In [2]:

    
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
from datetime import datetime, timedelta
import matplotlib.gridspec as gridspec
from matplotlib import animation, rc
import matplotlib.colors as colors
from IPython.display import HTML
import collections
import seaborn as sns
from collections import OrderedDict
from sklearn.neighbors import KDTree, BallTree
from sklearn.metrics import pairwise
from pandas.tseries.offsets import MonthEnd
import shapefile
from shapely.geometry import shape, Point, Polygon
import scipy.stats as stats
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import pysal
import scipy.stats
from matplotlib.colors import LogNorm
from matplotlib.font_manager import FontProperties
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
import cartopy.feature as feature
from zipfile import ZipFile
from lxml import html
import xml.etree.ElementTree as et
import h5py









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/pysal/__init__.py:65: VisibleDeprecationWarning: PySAL's API will be changed on 2018-12-31. The last release made with this API is version 1.14.4. A preview of the next API version is provided in the `pysal` 2.0 prelease candidate. The API changes and a guide on how to change imports is provided at https://migrating.pysal.org
  ), VisibleDeprecationWarning)



In [3]:

    
output_path = '/Users/danielfisher/Dropbox/working_documents/papers/globalgasflaring/figures/revision_1'
output_path_ds = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/output'



In [4]:

    
def get_arcmin(x):
    '''
    rounds the data decimal fraction of a degree
    to the nearest arc minute
    '''
    neg_values = x < 0

    abs_x = np.abs(x)
    floor_x = np.floor(abs_x)
    decile = abs_x - floor_x
    minute = np.around(decile * 60)  # round to nearest arcmin
    minute_fraction = minute*0.01  # convert to fractional value (ranges from 0 to 0.6)

    max_minute = minute_fraction > 0.59

    floor_x[neg_values] *= -1
    floor_x[neg_values] -= minute_fraction[neg_values]
    floor_x[~neg_values] += minute_fraction[~neg_values]
    
    # deal with edge cases, and just round them all up
    if np.sum(max_minute) > 0:
        floor_x[max_minute] = np.around(floor_x[max_minute])

    # now to get rid of rounding errors and allow comparison multiply by 100 and convert to int
    floor_x = (floor_x * 100).astype(int)

    return floor_x

------- OVERALL WORKFLOW ---------

Load in all datasets
Screen out overlapping data periods
Evaluate sampling information to check that it is consistent across the data.
Adjust cloud cover statistics as they are not consistent across the sampling dataset.

------- DATA SETUP -------

Load in the hotspot dataframes for ATX and SLS:



In [5]:

    
hotspot_path_ats = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_flares_atx_adaptive.csv'
hotspot_path_sls = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_flares_sls_adaptive.csv'

ats_hotspot_df = pd.read_csv(hotspot_path_ats)
sls_hotspot_df = pd.read_csv(hotspot_path_sls)

ats_hotspot_df['sample_counts_flare'] = 1
sls_hotspot_df['sample_counts_flare'] = 1



In [730]:

    
ats_hotspot_df.columns









    Out[730]:





Index([u'Unnamed: 0', u'lats_arcmin', u'lons_arcmin', u'bg_size_used',
       u'inval_pixels_bg_pc', u'mwir_bg', u'pixel_size', u'lats',
       u'hotspot_bg_pc', u'cloud_cover', u'swir_radiances', u'frp',
       u'mwir_radiances', u'lons', u'year', u'month', u'day', u'hhmm',
       u'sensor', u'se_dist', u'frp_coeff', u'sample_counts_flare'],
      dtype='object')



In [6]:

    
# slstr radiance and frp adjustment



In [7]:

    
sls_hotspot_df.frp *= 1.12
sls_hotspot_df.swir_radiances *= 1.12
sls_hotspot_df.swir_radiances_22 *= 1.20

Load in the sampling dataframes for ATX and SLS:



In [8]:

    
# generate ATX sampling dataframe
path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/'
df_names = ['all_sampling_at1_adaptive.csv', 'all_sampling_at2_adaptive.csv', 'all_sampling_ats_adaptive.csv']

df_list = []
for df in df_names:
    df_list.append(pd.read_csv(os.path.join(path, df)))
ats_hotspot_sampling_df = pd.concat(df_list)               
ats_hotspot_sampling_df['sample_counts_all'] = 1



In [9]:

    
#sampling_path_ats = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/all_sampling_ats.csv'
sampling_path_sls = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/processed/l3/all_sensors/v2/all_sampling_sls_adaptive.csv'

#ats_hotspot_sampling_df = pd.read_csv(sampling_path_ats)
sls_hotspot_sampling_df = pd.read_csv(sampling_path_sls)

#ats_hotspot_sampling_df['sample_counts_all'] = 1
sls_hotspot_sampling_df['sample_counts_all'] = 1

Load in the volcano dataframe:



In [10]:

    
volc_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/volcanoes/volcanoes.csv'
volc_df = pd.read_csv(volc_path)
volc_df.dropna(inplace=True)

Generate the countries dataframe for the hotspot data. Each unique hotspot location is assigned a country based on the EEZ land boundaries shape file.



In [11]:

    
def extract_countries():
    c_shp = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/borders/EEZ_land_union_v2_201410/EEZ_land_v2_201410.shp'
    shape_file = shapefile.Reader(c_shp)
    #first feature of the shapefile
    countries = {}
    for feature in shape_file.shapeRecords():
        country_name = feature.record[2]
        country_shp = shape(feature.shape.__geo_interface__)
        #eez_boundaries[eez_name] = eez_shp.simplify(0.2) 
        countries[country_name] = country_shp
    return countries
    
    
def create_countries_dataframe(countries, df):
    cols = ['lats_arcmin', 'lons_arcmin', 'lats', 'lons']
    countries_df = df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[cols].copy()
    country_list = []
    for i, row in countries_df.iterrows():
        country_found = False
        point = Point(row.lons,row.lats)    
        for k in countries:
            poly = countries[k]
            if point.within(poly):
                country_list.append(k)
                country_found = True
                break
        if country_found:
            continue    
        # if we are here no countries have been found so
        country_list.append('Without Country')  
    countries_df['countries'] = country_list
    return countries_df
    
countries_dict = extract_countries()
ats_countries_df = create_countries_dataframe(countries_dict, ats_hotspot_df)
sls_countries_df = create_countries_dataframe(countries_dict, sls_hotspot_df)

Load in the BCM data from DMSP for comparison against ATSR dataset:



In [12]:

    
bcm_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/dmsp/BCM_Global_20110223.xlsx'
cols=['Location', 'Year', 'MEAN BCM']
bcm_df_dict = pd.read_excel(bcm_path,  sheetname=None)
bcm_df = pd.concat(bcm_df_dict).reset_index()[['Year', 'Location', 'MEAN BCM']]
bcm_df.loc[bcm_df.Location=='Russia_Combined', 'Location'] = 'Russia'
bcm_df.loc[bcm_df.Location=='USA_Combined', 'Location'] = 'USA'
bcm_df.rename(columns={'Location': 'countries', 'Year': 'year', 'MEAN BCM': 'bcm'}, inplace=True)









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/pandas/util/_decorators.py:188: FutureWarning: The `sheetname` keyword is deprecated, use `sheet_name` instead
  return func(*args, **kwargs)
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  after removing the cwd from sys.path.

Load in the VIIRS hotspot data for Iraq for comparison against ATSR:



In [543]:

    
viirs_iraq_hotspot_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_iraq_flares/hotspots_laads.xlsx'
viirs_iraq_hotspot_df = pd.read_excel(viirs_iraq_hotspot_path)
viirs_iraq_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)

Load in VIIRS hotspot data for bias assessment of SLSTR:



In [14]:

    
viirs_bias_hotspot_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/slstr_bias_assessment'

viirs_got_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'got.csv'))
viirs_got_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)

viirs_libya_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'libya.csv'))
viirs_libya_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)

viirs_oman_hotspot_df = pd.read_csv(os.path.join(viirs_bias_hotspot_path, 'oman.csv'))
viirs_oman_hotspot_df.rename(columns={'lat': 'lats', 'lon': 'lons'}, inplace=True)

Load in the VIIRS industrial flaring activity for comparison against ATSR:



In [15]:

    
viirs_industrial_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_industrial_sites'
kmz_fname = 'doc.kml'
doc = et.parse(os.path.join(viirs_industrial_path, kmz_fname))



In [16]:

    
nmsp = '{http://www.opengis.net/kml/2.2}'

df_list = []
for pm in doc.iterfind('.//{0}Placemark'.format(nmsp)):

    name = (pm.find('{0}name'.format(nmsp)).text)
    for ls in pm.iterfind('.//{0}coordinates'.format(nmsp)):
        pos = np.fromstring(ls.text.strip().replace('\n',''), dtype=float, sep=', ') 
        lon = pos[0]
        lat = pos[1]        
        df = pd.DataFrame([[lat,lon,name]], columns=['lats', 'lons', 'Name'])
        df_list.append(df)
        break
viirs_industrial_df = pd.concat(df_list)

viirs_industrial_df['lats_arcmin'] = get_arcmin(viirs_industrial_df.lats.values)
viirs_industrial_df['lons_arcmin'] = get_arcmin(viirs_industrial_df.lons.values)

id_dict = {'Nonmetal-mineral':1,
           'Ferrous-metal': 2,
           'Coal-processing': 3,
           'Oil/gas': 4
          }
viirs_industrial_df['name_id'] = viirs_industrial_df.Name.map(id_dict)



In [17]:

    
viirs_countries_df = create_countries_dataframe(countries_dict, viirs_industrial_df)

Load in NightFire data for comparison against SLSTR:



In [18]:

    
nightfire_path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_nightfire'

df_list = []
for f in os.listdir(nightfire_path):
    df_list.append(pd.read_csv(os.path.join(nightfire_path, f)))
nightfire_df = pd.concat(df_list)

nightfire_df.rename(columns={'Lat_GMTCO': 'lats', 'Lon_GMTCO': 'lons'}, inplace=True)
nightfire_df['lats_arcmin'] = get_arcmin(nightfire_df.lats.values)
nightfire_df['lons_arcmin'] = get_arcmin(nightfire_df.lons.values)









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:6: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.



In [19]:

    
nightfire_countries_df = create_countries_dataframe(countries_dict, nightfire_df)



In [20]:

    
def load_viirs_zenith():
    path = '/Users/danielfisher/Projects/kcl-globalgasflaring/data/external/viirs_zenith/'
    f = 'GMODO-SVM01-SVM03-SVM04-SVM05_npp_d20150705_t0254234_e0255476_b19092_c20171108213818198685_noaa_ops.h5'
    ds = h5py.File(os.path.join(path, f))  
    vza = ds['All_Data']['VIIRS-MOD-GEO_All']['SatelliteZenithAngle'][:]
    return vza[0,:]

viirs_zenith_angles = load_viirs_zenith()

------- DATA SELECTION -------

Drop sensor overlap data (also see Notes at bottom)

Some of the ATX sensors have various issues during there operational lifetimes (see the notes about this at the bottom). It therefore makes sense to prioritse data from a certain sensor over another given any overlapping periods. Overlaps are removed from the persistent hotspot and sampling dataframes below before any further analysis is performed on the data. The justifications for the sensors prioritisation in any overlapping period are given at the bottom of the notebook. In some instances, there is still poor sensor operational behaviour, but if no overlapping data is available we have to use what is given to us. The masking is only applied to the ATX data, not SLSTR:



In [21]:

    
# persistent hotspot df

mask = (~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1995)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2003)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 1)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 2)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 3)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 4)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 5)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 7)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 8)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 9)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 10)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 11)) &
        ~((ats_hotspot_df.sensor == 'at1' ) & (ats_hotspot_df.year == 1996) & (ats_hotspot_df.month == 12)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 4)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 5)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 6)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 7)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 8)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 9)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 10)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 11)) &
        ~((ats_hotspot_df.sensor == 'at2' ) & (ats_hotspot_df.year == 2002) & (ats_hotspot_df.month == 12))        
       )

ats_hotspot_df = ats_hotspot_df[mask]



In [22]:

    
print ats_hotspot_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]



In [23]:

    
# all sampling dataframe

mask = (~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1995)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2003)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 1)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 2)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 3)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 4)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 5)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 7)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 8)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 9)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 10)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 11)) &
        ~((ats_hotspot_sampling_df.sensor == 'at1' ) & (ats_hotspot_sampling_df.year == 1996) & (ats_hotspot_sampling_df.month == 12)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 4)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 5)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 6)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 7)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 8)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 9)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 10)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 11)) &
        ~((ats_hotspot_sampling_df.sensor == 'at2' ) & (ats_hotspot_sampling_df.year == 2002) & (ats_hotspot_sampling_df.month == 12))        
       )

ats_hotspot_sampling_df = ats_hotspot_sampling_df[mask]



In [688]:

    
print ats_hotspot_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]



In [24]:

    
sls_hotspot_df_0317 = sls_hotspot_df[(sls_hotspot_df.year == 2017) & (sls_hotspot_df.month == 3)]

mask = ((sls_hotspot_df.year != 2016) & ~((sls_hotspot_df.year == 2017) & 
                                                   (sls_hotspot_df.month <= 4)))
sls_hotspot_df = sls_hotspot_df[mask]



In [25]:

    
print sls_hotspot_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]



In [26]:

    
mask = ((sls_hotspot_sampling_df.year != 2016) & ~((sls_hotspot_sampling_df.year == 2017) & 
                                                   (sls_hotspot_sampling_df.month <= 4)))
sls_hotspot_sampling_df = sls_hotspot_sampling_df[mask]

------- FLARE ACTIVITIY DISCRIMINATION -------

Various approaches are applied in this section to discriminate between flaring and non-flaring persistent hotspots. First the spectral ratio method is applied to the ATX and SLS datasets. In the case of ATX all data with either invalid backgrounds, saturated MWIR radianes (are already NaN), or orberved by AT1 have the MWIR radiance value set to NaN. The ratio is then computed between the SWIR and MWIR spectral radiances (with the NaN being preserved through this ratio). For each thermal anomaly grid cell the median of all ratio values is then computed across the entire time series for the grid cell, if this median value is found to be between the assumed flaring thresholds then it is set to be a flare.
In the case of SLSTR, no data is excluded prior to the calculation of the median, and no upper threshold is used.
Any AT1 data that has associated AT2 and ATS data with its grid cell now has a flaring acitivity classification from the application of the spectral ratio method. However, some grid cells are only associated with AT1 falres and therefore have no valid classification. In such instances, the AT1 data is assigned a classficiation based on a majority voting looking at all grid cells within 1 deg over the entire time series, working on the assumption that flaring activity is clustered over certain spatial scales.
In some cases AT2 and ATS data also have no valid ratio values. If that invalid ratio is caused by having no background observations, then the AT2 or ATS grid cell is assigned to that of its neighbours. However, if it is caused by MWIR saturation, then it is assumed to be not associated with gas flaring.

Extract ATX Ratios



In [27]:

    
ats_ratio_df = ats_hotspot_df[['mwir_bg', 'sensor', 'swir_radiances', 'mwir_radiances',
                             'lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'sample_counts_flare']].copy()

ats_ratio_df['mwir_saturated_counts'] = ats_ratio_df.mwir_radiances.isnull()
ats_ratio_df['mwir_no_bg_counts'] = ats_ratio_df.mwir_bg == -1

ats_ratio_df.mwir_bg.loc[ats_ratio_df.mwir_bg == -1] = np.nan  # invalid background
ats_ratio_df.mwir_bg.loc[ats_ratio_df.sensor == 'at1'] = np.nan  # no MWIR from AT1


eps= 0.001
ratio = ats_ratio_df.swir_radiances.values / \
        ((ats_ratio_df.mwir_radiances.values - ats_ratio_df.mwir_bg.values)+ eps)
ats_ratio_df['ratio'] = ratio

to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'ratio': np.nanmedian,
          'lats': np.mean,
          'lons': np.mean,
          'sample_counts_flare': np.sum,
          'mwir_saturated_counts': np.sum,
          'mwir_no_bg_counts': np.sum}
ats_ratio_df = ats_ratio_df.groupby(to_group, as_index=False).agg(to_agg)

ats_ratio_df['is_flare'] = (ats_ratio_df.ratio >= 1.61) & (ats_ratio_df.ratio < 8.35)









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/pandas/core/indexing.py:190: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)

Pre-reassingment ATS Stats



In [28]:

    
flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
non_flaring_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool') & ats_ratio_df.ratio.notnull()][['lats_arcmin', 'lons_arcmin']]
null_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool') & ats_ratio_df.ratio.isnull()][['lats_arcmin', 'lons_arcmin']]



In [29]:

    
for l, m in zip(['flares', 'not_flares', 'null'], [flaring_map, non_flaring_map, null_map]):
    sub_df =  ats_hotspot_df.merge(m, on=['lats_arcmin', 'lons_arcmin'])
    print 'AT1', l, sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
    print 'AT2', l, sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
    print 'ATS', l, sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]

    print









    



AT1 flares 13187
AT2 flares 19754
ATS flares 21582

AT1 not_flares 6082
AT2 not_flares 9948
ATS not_flares 10279

AT1 null 2091
AT2 null 2851
ATS null 1670

Extract SLS Ratio



In [30]:

    
sls_ratio_df = sls_hotspot_df[['sensor', 'swir_radiances', 'swir_radiances_22',
                       'lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'sample_counts_flare']].copy()

eps= 0.001
ratio = sls_ratio_df.swir_radiances.values / (sls_ratio_df.swir_radiances_22 + eps)
sls_ratio_df['ratio'] = ratio

# here taking the mean behaviour of the gas flare over all its samples.
# This is where something is going wrong with the SLSTR behaviour.
# so maybe we have some bad data in here somewhere.
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'ratio': np.nanmedian,
          'lats': np.mean,
          'lons': np.mean,
          'sample_counts_flare': np.sum}
sls_ratio_df = sls_ratio_df.groupby(to_group, as_index=False).agg(to_agg)

# inser flare variable
sls_ratio_df['is_flare'] = (sls_ratio_df.ratio >= 0.94) & (sls_ratio_df.ratio < 1.92)



In [31]:

    
print 'SLS is flare', sls_ratio_df['is_flare'].sum()
print 'SLS not flare', (~sls_ratio_df['is_flare']).sum()









    



SLS is flare 10431
SLS not flare 12021

Spectral Ratio and Threshold Selection Plot



In [32]:

    
plt.close('all')

def planck_radiance(wvl, temp):
    '''
    wvl: wavelngth (microns)
    temp: temperature (kelvin)
    '''
    c1 = 1.19e-16  # W m-2 sr-1
    c2 = 1.44e-2  # mK
    wt = (wvl*1.e-6) * temp # m K
    d = (wvl*1.e-6)**5 * (np.exp(c2/wt)-1)
    return c1 / d * 1.e-6  # W m-2 sr-1 um-1

# set up fig
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

# spectral ratio plot
flare_size = 10000  # in sq. m.
pixel_size = 1e6  # in sq. m.
flare_area_pc = flare_size / pixel_size  # unitless
temps = np.arange(0,3501, 1)  # in K

spect_rad_swir = flare_area_pc * planck_radiance(1.6, temps)
spect_rad_swir_2 = flare_area_pc * planck_radiance(2.2, temps)
spect_rad_mwir = flare_area_pc * planck_radiance(3.7, temps)
spect_rad_lwir = flare_area_pc * planck_radiance(11, temps)

ratio_mwir = spect_rad_swir / spect_rad_mwir
ratio_swir = spect_rad_swir / spect_rad_swir_2

print 'MWIR ratio 1400K', ratio_mwir[1400]
print 'MWIR ratio 2800K', ratio_mwir[2800]
print 'SWIR ratio 1400K', ratio_swir[1400]
print 'SWIR ratio 2800K', ratio_swir[2800]


ax0.plot(temps, ratio_mwir,  "k--", linewidth=1, label="$L_{1.6} / L_{3.7}$")
ax0.plot([-1,1400],[ratio_mwir[1400], ratio_mwir[1400]],'r--',linewidth=1)
ax0.plot([-1,2800],[ratio_mwir[2800], ratio_mwir[2800]],'r--',linewidth=1)

ax0.set_ylim([-0.25,10])
ax0.set_xlim([-0.25,3100])

ax0.plot(temps, ratio_swir,  "k-", linewidth=1, label="$L_{1.6} / L_{2.2}$")
#ax0.plot([1400,3501],[ratio_swir[1400], ratio_swir[1400]],'r-',linewidth=1)
#ax0.plot([2800,3501],[ratio_swir[2800], ratio_swir[2800]],'r-',linewidth=1)
ax0.plot([-1,1400],[ratio_swir[1400], ratio_swir[1400]],'r-',linewidth=1)
ax0.plot([-1,2800],[ratio_swir[2800], ratio_swir[2800]],'r-',linewidth=1)
ax0.legend(prop={'size': 15})

ax0.set_xlabel("Temperature (K)", fontsize=16)
ax0.set_ylabel("Spectral Ratio", fontsize=16)

# histogram plots
ax1.hist(ats_ratio_df.ratio[(ats_ratio_df.ratio > 0) & (ats_ratio_df.ratio < 20)], 
         bins=100, normed=True, histtype='step', color='k')
ax1.plot([1.6,1.61], [0,0.4], 'r--', linewidth=2)
ax1.plot([8.34,8.34], [0,0.4], 'r--', linewidth=2)
ax1.set_ylim([0, 0.3])
ax1.set_xlabel('ATSR Spectral Ratio', fontsize=16)
ax1.set_ylabel('Normalised Frequency', fontsize=14)


ax2.hist(sls_ratio_df.ratio[(sls_ratio_df.ratio > 0) & (sls_ratio_df.ratio < 5)], 
         bins=100, normed=True, histtype='step', color='k')
ax2.plot([0.85,0.85], [0,1.5], 'r-', linewidth=2)
ax2.plot([1.92,1.92], [0,1.5], 'r-', linewidth=2)
ax2.set_xlabel('SLSTR Spectral Ratio', fontsize=16)
ax2.set_ylabel('Normalised Frequency', fontsize=14)

ax2.set_ylim([0, 1.4])

ax0.text(0.91, 0.05, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.91, 0.05, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.91, 0.05, '(c)', transform=ax2.transAxes, fontsize=14)

    

plt.savefig(os.path.join(output_path, 'ratio_plot.png'), bbox_inches='tight', dpi=600)
plt.show()









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: divide by zero encountered in divide
  # This is added back by InteractiveShellApp.init_path()
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:28: RuntimeWarning: invalid value encountered in divide
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:29: RuntimeWarning: invalid value encountered in divide
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/matplotlib/axes/_axes.py:6571: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
  warnings.warn("The 'normed' kwarg is deprecated, and has been "
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/matplotlib/axes/_axes.py:6571: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
  warnings.warn("The 'normed' kwarg is deprecated, and has been "






    



MWIR ratio 1400K 1.6169826864458048
MWIR ratio 2800K 8.346753185124847
SWIR ratio 1400K 0.8447384206877953
SWIR ratio 2800K 1.9253755943204707

Sample Level Ratio Analysis

Assess the ratio distribution at the level of the sample (the above was done at the level of the flare). With this we can see how much sampling there is in the region of overlap in the above figure.



In [33]:

    
ats_sample_level_ratio = ats_hotspot_df.merge(ats_ratio_df, on=['lats_arcmin', 'lons_arcmin'])
sls_sample_level_ratio = sls_hotspot_df.merge(sls_ratio_df, on=['lats_arcmin', 'lons_arcmin'])



In [34]:

    
plt.close('all')
# set up fig
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))


# histogram plots
ax0.hist(ats_sample_level_ratio.ratio[(ats_sample_level_ratio.ratio > 0) & (ats_sample_level_ratio.ratio < 20)], 
         bins=100, normed=False, histtype='step', color='k')
ax0.plot([1.6,1.61], [0,300000], 'r--', linewidth=2)
ax0.plot([8.34,8.34], [0,300000], 'r--', linewidth=2)
ax0.set_ylim([0, 220000])
ax0.set_xlabel('ATSR Spectral Ratio', fontsize=16)
ax0.set_ylabel('Frequency', fontsize=14)


ax1.hist(sls_sample_level_ratio.ratio[(sls_sample_level_ratio.ratio > 0) & (sls_sample_level_ratio.ratio < 5)], 
         bins=100, normed=False, histtype='step', color='k')
ax1.plot([0.85,0.85], [0,15500], 'r-', linewidth=2)
ax1.plot([1.92,1.92], [0,15500], 'r-', linewidth=2)
ax1.set_xlabel('SLSTR Spectral Ratio', fontsize=16)
ax1.set_ylabel('Frequency', fontsize=14)
ax1.set_ylim([0, 15500])

ax0.text(0.91, 0.05, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.91, 0.05, '(b)', transform=ax1.transAxes, fontsize=14)

plt.savefig(os.path.join(output_path, 'sample_level_ratio_plot.png'), bbox_inches='tight', dpi=600)
plt.show()









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/matplotlib/axes/_axes.py:6571: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
  warnings.warn("The 'normed' kwarg is deprecated, and has been "
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/matplotlib/axes/_axes.py:6571: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
  warnings.warn("The 'normed' kwarg is deprecated, and has been "

Reassign null flares based on majority vote

In some cases, we wish to reassign points will null ratios to the flaring class. As currently, if the ratio is invalid then we assume that the point is not a flare, and that may not be the case. In all cases if the ratio is invlaid then the classification of the flares is that of its neighbours.

For AT1 all persistent hotspots have no ratios associated with them. In those instances where ATS and AT2 data are avilable in a grid cell then, assuming ATS and AT2 ratios are avilable, the grid cell can still have a valid ratio assigned. However, not all AT1 grid cells have ATS and AT2 data associated with them. IN such cases the assigment is made from the nearest neighbours.

For AT2 and ATS if the a grid cell has no valid ratio values then we also assign the ratio from nearby grid cells. The causes of no valid ratios for these sesors are either, no suitable background radiance estimate or a saturated MWIR channel. Less often sampled gas flares may suffer from no valid background, and larger more extreme flares may result in saturation of the MWIR channel. So we assign class based on the local neighbourhood.



In [35]:

    
def reassign_flares(ratio_df, k=1000, d=1):
    '''
    k = number of neighbors to consider
    d = max distance to consider for neighbours
    '''
    current_flares = ratio_df.is_flare.values

    flare_locations = np.array(zip(np.deg2rad(ratio_df.lats.values), 
                                   np.deg2rad(ratio_df.lons.values)))

    # compare the flare locations to the potential locations in the orbit
    locations_balltree = BallTree(flare_locations, metric='haversine')
    distances, indexes = locations_balltree.query(flare_locations, k=k) 

    # distances mask
    self_mask = np.rad2deg(distances) > 1/60  # do not consider the point itself
    masked_distances = (np.rad2deg(distances) < d) & self_mask # and not pionts > 1 deg away

    # update is flare based on neighbours
    is_flare = np.ones(ratio_df.ratio.shape)*999
    for i in xrange(is_flare.size):

        # get the ratio of the set of flares a closest to current flare 
        closest_flares = ratio_df.is_flare[indexes[i,:]] 

        # mask based on distances
        masked_closest = closest_flares[masked_distances[i, :]]
        if len(masked_closest) > 0:

            # take the nanmean of valid values.  We will have nans nearby
            # due to invalid backgrounds and observations from AT1
            mean_local_type = np.nanmean(masked_closest)

            # if mean is > 0.5, then majority is flare, else majority is industrial
            if mean_local_type >= 0.5:
                is_flare[i] = 1
            else:
                is_flare[i] = 0
        else:
            is_flare[i] = current_flares[i]
    
    # get the null ratio mask
    ratio_df['updated_flare'] = is_flare
    return ratio_df



In [36]:

    
ats_ratio_df = reassign_flares(ats_ratio_df, d=1)



In [37]:

    
mask = ats_ratio_df.ratio.isnull()
ats_ratio_df.is_flare.loc[mask] = ats_ratio_df.updated_flare.loc[mask]

post assignment ATSR stats



In [38]:

    
flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
non_flaring_map = ats_ratio_df[~ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]



In [39]:

    
for l, m in zip(['flares', 'not_flares'], [flaring_map, non_flaring_map]):
    sub_df =  ats_hotspot_df.merge(m, on=['lats_arcmin', 'lons_arcmin'])
    print 'AT1', l, sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
    print 'AT2', l, sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
    print 'ATS', l, sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]

    print









    



AT1 flares 14426
AT2 flares 21186
ATS flares 22553

AT1 not_flares 6934
AT2 not_flares 11367
ATS not_flares 10978

Screen Volcanoes

As there is overlap between flaring and non-flaring thermal anomalies, further screening of the dataset is beneficial. This is particularly the case for volcanoes, which if detected can produce very large signals that are not representative of true flaring activity. Here we screen any data points that are within 10 km of a volcano that has been seen active at any point.



In [40]:

    
def dist_to_nearest_volcano(volc_df, ratio_df):
    
    volc_lat_lon = np.dstack([np.deg2rad(volc_df.Latitude.values), 
                              np.deg2rad(volc_df.Longitude.values)])[0]
    volc_balltree = BallTree(volc_lat_lon, metric='haversine')

    # get the unique flare lats and lons for assessment in kdtree
    flare_locations = np.array(zip(np.deg2rad(ratio_df.lats.values), 
                                   np.deg2rad(ratio_df.lons.values)))

    # compare the flare locations to the potential locations in the orbit
    distances, indexes = volc_balltree.query(flare_locations, k=1) 

    # set up the dataframe to hold the distances
    degree_distances = np.rad2deg(distances)
    mask = degree_distances < 5.0/60  # if within ~10km of any volcano, then is volcano
    ratio_df['is_volcano'] = mask
    return ratio_df



In [41]:

    
ats_ratio_df = dist_to_nearest_volcano(volc_df, ats_ratio_df)
sls_ratio_df = dist_to_nearest_volcano(volc_df, sls_ratio_df)



In [42]:

    
# we want to know how many more points are exluded by volcanoes, on top of the points already excluded
# by the ratio test, so where the ratio is good!
ats_volcano_map = ats_ratio_df[ats_ratio_df.is_volcano.astype('bool') & 
                               ats_ratio_df.is_flare][['lats_arcmin', 'lons_arcmin']]
sls_volcano_map = sls_ratio_df[sls_ratio_df.is_volcano.astype('bool') & 
                               sls_ratio_df.is_flare][['lats_arcmin', 'lons_arcmin']]



In [43]:

    
ats_ratio_df.is_flare.loc[ats_ratio_df.is_volcano] = 0
sls_ratio_df.is_flare.loc[sls_ratio_df.is_volcano] = 0



In [44]:

    
sub_df =  ats_hotspot_df.merge(ats_volcano_map, on=['lats_arcmin', 'lons_arcmin'])
print 'AT1', 'Volcanoes', sub_df[sub_df.sensor == 'at1'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'AT2', 'Volcanoes', sub_df[sub_df.sensor == 'at2'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]
print 'ATS', 'Volcanoes', sub_df[sub_df.sensor == 'ats'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]

print 

sub_df =  sls_hotspot_df.merge(sls_volcano_map, on=['lats_arcmin', 'lons_arcmin'])
print 'SLSTR', 'Volcanoes', sub_df[sub_df.sensor == 'sls'].groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_flare': np.sum}).shape[0]









    



AT1 Volcanoes 33
AT2 Volcanoes 64
ATS Volcanoes 66

SLSTR Volcanoes 3

Thermal Anomaly Plotting



In [45]:

    
def plot_data_screening(ratio_df, lab, fname, leg=True):
    
    flare_df = ratio_df[ratio_df.is_flare.astype('bool')]
    not_flare_df = ratio_df[~ratio_df.is_flare.astype('bool')]
    volc_df = ratio_df[ratio_df.is_volcano]
    
    plt.close('all')
    fig = plt.figure(figsize=(15,10))
    ax_list = []
    ax_list.append(plt.subplot2grid((4, 5), (0, 0), colspan=5, rowspan=3, projection=ccrs.PlateCarree()))
    ax_list.append(plt.subplot2grid((4, 5), (3, 0), projection=ccrs.PlateCarree()))
    ax_list.append(plt.subplot2grid((4, 5), (3, 1), projection=ccrs.PlateCarree()))
    ax_list.append(plt.subplot2grid((4, 5), (3, 2), projection=ccrs.PlateCarree()))
    ax_list.append(plt.subplot2grid((4, 5), (3, 3), projection=ccrs.PlateCarree()))
    ax_list.append(plt.subplot2grid((4, 5), (3, 4), projection=ccrs.PlateCarree()))
    
    for ax in ax_list:
        ax.set_xticks([])
        ax.set_yticks([])
    
    # set limites
    xlims = [(-180, 180), (-105, -87), (4, 9), (46, 56), (65, 82), (106, 125)]
    ylims = [(-90, 90), (25, 39), (3, 7), (23, 33), (55, 68), (33, 45)]
    pos = [(-102, 40), (-2, -1), (39, 26), (83, 62), (113, 47)]
    for ax, xlim, ylim in zip(ax_list, xlims, ylims):
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.coastlines()
        
    # scale the values
    flare_counts = flare_df['sample_counts_flare']     
    ind_counts = ratio_df['sample_counts_flare']
    
    # make main plot
    ax_list[0].text(0.94, 0.92, lab, transform=ax_list[0].transAxes, fontsize=25)
    ax_list[0].plot(volc_df.lons, volc_df.lats, '.', color='gray', markersize=10, label='Volcanic')
    scat = ax_list[0].scatter(flare_df.lons, flare_df.lats,
                      s=flare_counts/10, lw=0.2, 
                      edgecolors='r',
                      facecolor='None',
                      label = 'Flare')
    scat = ax_list[0].scatter(not_flare_df.lons, not_flare_df.lats,
                      s=ind_counts/10, lw=0.2, 
                      edgecolors='b',
                      facecolor='None',
                      label = "Non-Flare")
    leg0 = ax_list[0].legend(loc = 2, scatterpoints = 1, prop={'size': 13})
    leg0.legendHandles[0]._sizes = [100]
    leg0.legendHandles[1]._sizes = [100]
    leg0.legendHandles[2]._sizes = [100]
    leg0.get_frame().set_alpha(1)
    
    if leg:
        l1 = ax_list[0].scatter([-170],[-70], s=10/10, edgecolors='k', facecolor='None')
        l2 = ax_list[0].scatter([-170],[-70], s=100/10, edgecolors='k', facecolor='None')
        l3 = ax_list[0].scatter([-170],[-70], s=500/10, edgecolors='k', facecolor='None')
        l4 = ax_list[0].scatter([-170],[-70], s=1000/10, edgecolors='k', facecolor='None')

        labels = ["10", "100", "500", "1000"]
        leg1 = ax_list[0].legend([l1, l2, l3, l4], labels, frameon=True, fontsize=14,
                         handlelength=2, loc = 3, borderpad = 1,
                         handletextpad=1, title='N. Samples', scatterpoints = 1, prop={'size': 15})
        leg1.get_frame().set_alpha(1)
        ax_list[0].add_artist(leg0)
    
    # add roi boxes
    for i, (p, x, y) in enumerate(zip(pos, xlims[1:], ylims[1:])):
        ax_list[0].plot([x[0], x[0], x[1], x[1], x[0]], 
                        [y[0], y[1], y[1], y[0], y[0]], 'k-')
        ax_list[0].text(p[0], p[1], str(i+1)+'.', fontsize=12)
        
    # make alt plots
    for i, ax in enumerate(ax_list[1:]):        
        scat = ax.scatter(flare_df.lons, flare_df.lats,
                          s=flare_counts/10, lw=0.2, 
                          edgecolors='r',
                          facecolor='None')
        scat = ax.scatter(not_flare_df.lons, not_flare_df.lats,
                          s=ind_counts/10, lw=0.2, 
                          edgecolors='b',
                          facecolor='None')
        ax.text(0.1, 0.1, str(i+1)+'.', transform=ax.transAxes, fontsize=15)
    
    plt.savefig(os.path.join(output_path, fname), bbox_inches='tight', dpi=600)
    plt.show()



In [895]:

    
from sklearn.neighbors.kde import KernelDensity
from matplotlib.colors import BoundaryNorm

def estimate_density(df):
    
    lat_vals = df.lats.values
    lon_vals = df.lons.values
    counts = df.sample_counts_flare.values

    # removed the code to calcualte total counts in 
    # the data, now we are just getting the density of the 
    # total number of sites.
#     long_lat_list = []
#     long_lon_list = []
#     for lat, lon, c in zip(lat_vals, lon_vals, counts):
#         long_lat_list.extend([lat] * c)
#         long_lon_list.extend([lon] * c)
#     latlon = np.vstack([long_lat_list, long_lon_list]).T
    latlon = np.vstack([lat_vals, lon_vals]).T

    x = np.arange(-180, 181, )
    y = np.arange(-90, 91, 1)
    X, Y = np.meshgrid(x, y)
    xy = np.radians(np.vstack([Y.ravel(), X.ravel()]).T)

    # construct a spherical kernel density estimate of the distribution
    kde = KernelDensity(bandwidth=0.005, metric='haversine',
                        kernel='tophat', algorithm='ball_tree')
    kde.fit(np.radians(latlon))

    Z = np.exp(kde.score_samples(xy))
    Z = Z.reshape(X.shape) 
    Z = np.ma.masked_array(Z, mask)
    
    return X, Y, Z


def hist_count(df):
    bin_x = np.arange(-180, 181, 1)
    bin_y = np.arange(-90, 91, 1)
    meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
    meshed_x = meshed_x[:-1,:-1] + 0.5
    meshed_y = meshed_y[:-1,:-1] + 0.5

    binned_data = stats.binned_statistic_2d(df.lons, df.lats, 
                                            np.ones(df.lons.size), 'sum',
                                            bins=[bin_x, bin_y]).statistic.T
    mask = binned_data == 0
    Z = np.ma.masked_array(binned_data, mask)

    return meshed_x, meshed_y, Z    
    

def plot_data_screening_v3(df1, df2, df3):
    
    plt.close('all')
    
    flare_df1 = df1[df1.is_flare.astype('bool')]
    not_flare_df1 = df1[~df1.is_flare.astype('bool')]
    
    flare_df2 = df2[df2.is_flare.astype('bool')]
    not_flare_df2 = df2[~df2.is_flare.astype('bool')]
    
    flare_df3 = df3[df3.is_flare.astype('bool')]
    not_flare_df3 = df3[~df3.is_flare.astype('bool')]
    
    XT1, YT1, ZT1 = hist_count(flare_df1)
    XF1, YF1, ZF1 = hist_count(not_flare_df1)
    # mask each binned array so only dominant class is showing
    mask = ZT1 > ZF1
    ZT1_mask = np.ma.masked_array(ZT1, ~mask)
    mask = ZF1 > ZT1
    ZF1_mask = np.ma.masked_array(ZF1, ~mask)
    
    
    XT2, YT2, ZT2 = hist_count(flare_df2)
    XF2, YF2, ZF2 = hist_count(not_flare_df2)
    # mask each binned array so only dominant class is showing
    mask = ZT2 > ZF2
    ZT2_mask = np.ma.masked_array(ZT2, ~mask)
    mask = ZF2 > ZT2
    ZF2_mask = np.ma.masked_array(ZF2, ~mask)
    
    XT3, YT3, ZT3 = hist_count(flare_df3)
    XF3, YF3, ZF3 = hist_count(not_flare_df3)
    # mask each binned array so only dominant class is showing
    mask = ZT3 > ZF3
    ZT3_mask = np.ma.masked_array(ZT3, ~mask)
    mask = ZF3 > ZT3
    ZF3_mask = np.ma.masked_array(ZF3, ~mask)

    
    # set up figure
    plt.close('all')
    fig = plt.figure(figsize=(13,20))
    fig.subplots_adjust(hspace=0.001, wspace=0.1)
    ax1 = plt.subplot(3, 1, 1, projection=ccrs.EqualEarth())
    ax2 = plt.subplot(3, 1, 2, projection=ccrs.EqualEarth())
    ax3 = plt.subplot(3, 1, 3, projection=ccrs.EqualEarth())
    
    ax_list = [ax1, ax2, ax3]
    for i, ax in enumerate(ax_list):
        ax.set_global()
        ax.outline_patch.set_edgecolor('black')
        ax.coastlines(color='black', linewidth=0.5)
        ax.gridlines(color='black', linewidth=0.5, alpha=0.5)
        
    ax1.text(0.94, 0.92, '(a)', transform=ax1.transAxes, fontsize=25)
    ax2.text(0.94, 0.92, '(b)', transform=ax2.transAxes, fontsize=25)
    ax3.text(0.94, 0.92, '(c)', transform=ax3.transAxes, fontsize=25)
    
    img_extent = (-180, 180, -90, 90)
    im1T = ax1.imshow(ZT1_mask, origin='lower', cmap='Reds',  norm=LogNorm(vmin=1, vmax=500),
              extent=img_extent, transform=ccrs.PlateCarree())
    im1F = ax1.imshow(ZF1_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
              extent=img_extent, transform=ccrs.PlateCarree())
    
    im2T = ax2.imshow(ZT2_mask, origin='lower', cmap='Reds',norm=LogNorm(vmin=1, vmax=500),
              extent=img_extent, transform=ccrs.PlateCarree())
    im2F = ax2.imshow(ZF2_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
              extent=img_extent, transform=ccrs.PlateCarree())
    
    im3T = ax3.imshow(ZT3_mask, origin='lower', cmap='Reds', norm=LogNorm(vmin=1, vmax=500),
              extent=img_extent, transform=ccrs.PlateCarree())
    im3F = ax3.imshow(ZF3_mask, origin='lower', cmap='Blues', norm=LogNorm(vmin=1, vmax=100),
              extent=img_extent, transform=ccrs.PlateCarree())

    
    cbaxes1 = fig.add_axes([0.125, 0.1, 0.35, 0.01]) 
    cb1 = plt.colorbar(im1T, cax=cbaxes1, orientation="horizontal") 
    cb1.ax.tick_params(labelsize=12) 
    cb1.set_label('Flare Anomaly Count (Log)', fontsize=16)
    
    cbaxes2 = fig.add_axes([0.55, 0.1, 0.35, 0.01]) 
    cb2 = plt.colorbar(im1F, cax = cbaxes2, orientation="horizontal")
    cb2.ax.tick_params(labelsize=12) 
    cb2.set_label('Non-Flare Anomaly Count (Log)', fontsize=16)
    
    fname = 'density_plots.png'
    plt.savefig(os.path.join(output_path, fname), bbox_inches='tight', dpi=600)
    plt.show()

------- CREATE FLARING DATASET -------



In [562]:

    
ats_flaring_map = ats_ratio_df[ats_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]
sls_flaring_map = sls_ratio_df[sls_ratio_df.is_flare.astype('bool')][['lats_arcmin', 'lons_arcmin']]



In [563]:

    
ats_flare_sampling_df = ats_hotspot_sampling_df.merge(ats_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
sls_flare_sampling_df = sls_hotspot_sampling_df.merge(sls_flaring_map, on=['lats_arcmin', 'lons_arcmin'])



In [564]:

    
ats_flare_df = ats_hotspot_df.merge(ats_flaring_map, on=['lats_arcmin', 'lons_arcmin'])
sls_flare_df = sls_hotspot_df.merge(sls_flaring_map, on=['lats_arcmin', 'lons_arcmin'])



In [565]:

    
# set up the slstr annum dataset used in some of the analyses
sls_flare_df_annum = sls_flare_df[((sls_flare_df.year == 2017) & (sls_flare_df.month >= 5)) | (sls_flare_df.year == 2018)]
sls_flare_sampling_df_annum = sls_flare_sampling_df[((sls_flare_sampling_df.year == 2017) & (sls_flare_sampling_df.month >= 5)) | (sls_flare_sampling_df.year == 2018)]

------- FLARE SCREENING -------

There is irregular behaviour in the AT1 hotspot data in the Northern Hemisphere in some years - for evidence of this see the gif below. During 1994 and 1995 there are odd high FRP detections over much of the northern hemisphere, this leads to erroneoously high FRPs being reported for the US, UK, Canada and Russia. These values need to be exluded. Below first examining the statistics of the dataset it is clear that the tails of the AT1 statistical distributions have much higher values than that of ATS. Looking at the maximum mean values for any individual ATS gas flare we can see that the maximum mean value is around 80 MW. Whilst for AT1 these maximum mean values go up to 670 MW. Looking at the distribution of the standard deviation, the AT1 and and ATS data have similar ranges, but a larger proportion of the AT1 data is located in the tail. For ATS over % of the data has standard deviation values that are less than and using this information we can threshold the AT1 data so that the flaring activity is consistent with that seen on ATS. We can also apply the same approach to AT2 to remove any flaring activity that is not consistent with ATS.

By setting an upper limit on the standard deviation of ???, which comprises ??? % of the ATS data, and upper limits of 80 for the mean, which contain 100% of the AT2 and ATS data we can exclude the AT1 thermal anomalies with odd behviour in terms of the behaviour seen in AT2 and ATS.



In [566]:

    
print ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'ats'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at2'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at1'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]



In [582]:

    
def errors_plot(at1_stats_df, at2_stats_df, ats_stats_df, sls_stats_df):
    fig = plt.figure(figsize=(18, 7))
    ax0 = plt.subplot(121)
    ax1 = plt.subplot(122)

    labels = ['SLSTR Mean', 'AATSR Mean', 'ATSR-2 Mean', 'ATSR-1 Mean',
              'SLSTR SD', 'AATSR SD', 'ATSR-2 SD', 'ATSR-1 SD']
    no_labels = ['','','','','','','','']


    data = [sls_stats_df.frp_mean, ats_stats_df.frp_mean, at2_stats_df.frp_mean, at1_stats_df.frp_mean,
            sls_stats_df.frp_sd[sls_stats_df.frp_sd.notnull()], ats_stats_df.frp_sd[ats_stats_df.frp_sd.notnull()], 
            at2_stats_df.frp_sd[at2_stats_df.frp_sd.notnull()], at1_stats_df.frp_sd[at1_stats_df.frp_sd.notnull()]]

    flierprops = dict(marker='o', markerfacecolor='red', markersize=5,
                      linestyle='none')

    #plt.yscale('log')
    ax0.set_xlim([1e-4, 1e3])
    ax0.set_xscale("log", nonposx='clip')
    ax0.boxplot(data, vert=False,  flierprops=flierprops, labels=labels, whis=[0.05, 99.95])
    ax0.set_xlabel('Log FRP (MW)',fontsize=20)
    ax0.tick_params(axis='both', which='major', labelsize=14)

    # values from ATS data, Punta de mata flare in venezuela
    max_mean = 70.704
    max_sd = 93.158

    at1_mask = (at1_stats_df.frp_mean < max_mean) & (at1_stats_df.frp_sd < max_sd)
    at2_mask = (at2_stats_df.frp_mean < max_mean) & (at2_stats_df.frp_sd < max_sd)
    ats_mask = (ats_stats_df.frp_mean < max_mean) & (ats_stats_df.frp_sd < max_sd)
    sls_mask = (sls_stats_df.frp_mean < max_mean) & (sls_stats_df.frp_sd < max_sd)

    data = [sls_stats_df.frp_mean[sls_mask], ats_stats_df.frp_mean[ats_mask], at2_stats_df.frp_mean[at2_mask], at1_stats_df.frp_mean[at1_mask],
            sls_stats_df.frp_sd[sls_mask], ats_stats_df.frp_sd[ats_mask], at2_stats_df.frp_sd[at2_mask], at1_stats_df.frp_sd[at1_mask]]

    flierprops = dict(marker='o', markerfacecolor='red', markersize=5,
                      linestyle='none')

    ax1.set_xlim([1e-4, 1e3])
    ax1.set_xscale("log", nonposx='clip')
    ax1.boxplot(data, vert=False, flierprops=flierprops, labels=no_labels, whis=[0.05, 99.95])
    #ax1.tick_params(labelleft='off')    
    ax1.set_xlabel('Log FRP (MW)', fontsize=20)
    ax1.tick_params(axis='both', which='major', labelsize=14)

    
    ax0.text(0.05, 0.05, '(a)', transform=ax0.transAxes, fontsize=18)
    ax1.text(0.05, 0.05, '(b)', transform=ax1.transAxes, fontsize=18)
    
    plt.savefig(os.path.join(output_path, 'errors_plot.png'), bbox_inches='tight', dpi=600)
    plt.show()



In [568]:

    
def statistical_screening_plot(df, x, y, inner=True):
    plt.close('all')
    fig, ax1 = plt.subplots()

    h = ax1.hist2d(df[x], df[y], bins=50, cmin=1, norm=colors.LogNorm(), vmin=1, vmax=1000)
    cbar = plt.colorbar(h[3], ax=ax1)
    
    if inner:
        # These are in unitless percentages of the figure size. (0,0 is bottom left)
        left, bottom, width, height = [0.4, 0.4, 0.4, 0.4]
        sub_df = df[df[y] < 10]
        ax2 = fig.add_axes([left, bottom, width, height])
        ax2.hist2d(sub_df[x], sub_df[y], bins=20, cmin=1, norm=colors.LogNorm())

    ax1.set_ylabel(y)
    ax1.set_xlabel(x)

    plt.show()



In [569]:

    
def generate_stats_df(flare_df):
    cols = ['lats_arcmin', 'lons_arcmin', 'lats', 'lons', 'frp', 'year', 'sample_counts_flare']
    stats_df = flare_df[cols].copy()
    stats_df['frp_sd'] = stats_df['frp']
    stats_df['frp_mean'] = stats_df['frp']
    stats_df['frp_max'] = stats_df['frp']
    stats_df['frp_min'] = stats_df['frp']
    stats_df['frp_median'] = stats_df['frp']
    stats_df['frp_sum'] = stats_df['frp']


    to_group = ['lats_arcmin', 'lons_arcmin']
    to_agg = {'frp_sd': np.std, 
              'frp_mean': np.mean, 
              'frp_min': np.min,
              'frp_max': np.max,
              'frp_median': np.median,
              'sample_counts_flare': np.sum,
              'frp_sum': np.sum,
              'lats': np.mean,
              'lons': np.mean,
             }
    stats_df = stats_df.groupby(to_group, as_index=False).agg(to_agg)
    return stats_df



In [570]:

    
at1_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'at1'])
at2_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'at2'])
ats_stats_df = generate_stats_df(ats_flare_df[ats_flare_df.sensor == 'ats'])
sls_stats_df = generate_stats_df(sls_flare_df[sls_flare_df.sensor == 'sls'])



In [583]:

    
plt.close('all')
errors_plot(at1_stats_df, at2_stats_df, ats_stats_df, sls_stats_df)

Reducing to valid flares



In [318]:

    
# set max values from punta de mata venezuela
max_mean = 70.705
max_sd = 93.16

# do ats
aats_flare_df = ats_flare_df[ats_flare_df.sensor == 'ats']
aats_flare_df = aats_flare_df.merge(ats_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = aats_flare_df.shape[0]
print s1

excluded_df = aats_flare_df[~((aats_flare_df.frp_mean < max_mean) & (aats_flare_df.frp_sd < max_sd))]
print 'total excluded ATS:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]

aats_flare_df = aats_flare_df[(aats_flare_df.frp_mean < max_mean) & 
                              (aats_flare_df.frp_sd < max_sd)]
aats_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = aats_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100
print

# do at2
at2_flare_df = ats_flare_df[ats_flare_df.sensor == 'at2']
at2_flare_df = at2_flare_df.merge(at2_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = at2_flare_df.shape[0]
print s1

excluded_df = at2_flare_df[~((at2_flare_df.frp_mean < max_mean) & (at2_flare_df.frp_sd < max_sd))]
print 'total excluded AT2:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]

at2_flare_df = at2_flare_df[(at2_flare_df.frp_mean < max_mean) & 
                              (at2_flare_df.frp_sd < max_sd)]
at2_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = at2_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100 
print

# do at1
at1_flare_df = ats_flare_df[ats_flare_df.sensor == 'at1']
at1_flare_df = at1_flare_df.merge(at1_stats_df[['frp_mean', 'frp_sd', 'lats_arcmin', 'lons_arcmin']], on=['lats_arcmin', 'lons_arcmin'])
s1 = at1_flare_df.shape[0]
print s1

excluded_df = at1_flare_df[~((at1_flare_df.frp_mean < max_mean) & (at1_flare_df.frp_sd < max_sd))]
print 'total excluded AT1:', excluded_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]

at1_flare_df = at1_flare_df[(at1_flare_df.frp_mean < max_mean) & 
                            (at1_flare_df.frp_sd < max_sd)]

at1_flare_df.drop(['frp_mean', 'frp_sd'], axis=1, inplace=True)
s2 = at1_flare_df.shape[0]
print s2
print s2 * 1.0 / s1 * 100
print

# join back together
ats_flare_df = pd.concat([aats_flare_df, at2_flare_df, at1_flare_df], ignore_index=True)









    



1876828
total excluded ATS: 0
1876828
100.0

680433
total excluded AT2: 0
680433
100.0

423371
total excluded AT1: 0
423371
100.0

Let visualise again to see if the strange AT1 data has been removed:



In [319]:

    
print ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'ats'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at2'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print ats_flare_df[ats_flare_df.sensor == 'at1'].drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]
print sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]



In [743]:

    
ats_flare_df.frp.describe()









    Out[743]:





count    2.989093e+06
mean     3.869577e+00
std      1.070474e+01
min      3.493459e-01
25%      6.013162e-01
50%      1.239129e+00
75%      3.184980e+00
max      1.483605e+03
Name: frp, dtype: float64



In [742]:

    
ats_flare_df[ats_flare_df.frp == 0.349345900631]









    Out[742]:







  
    
      
      Unnamed: 0
      lats_arcmin
      lons_arcmin
      bg_size_used
      inval_pixels_bg_pc
      mwir_bg
      pixel_size
      lats
      hotspot_bg_pc
      cloud_cover
      ...
      mwir_radiances
      lons
      year
      month
      day
      hhmm
      sensor
      se_dist
      frp_coeff
      sample_counts_flare
    
  
  
    
      786761
      3076939
      1547
      4903
      8
      0.0
      0.211808
      941982.353861
      15.783333
      0.00346
      0.00346
      ...
      0.199892
      49.050003
      2000
      1
      2
      1859
      at2
      0.98328
      6.896976
      1
    
  

1 rows × 22 columns

Now lets redo the sampling analysis to consider only persistent hotspots assocaited with flainr! We do not care about the sampling of non-flare hotspots!

------- EVALUATE FLARE SAMPLING STATISTICS -------



In [320]:

    
# reduce sampling dataframes to hotspot dataframes
ats_flare_sampling_df = ats_hotspot_sampling_df.merge(ats_flare_df[['lats_arcmin', 'lons_arcmin']].drop_duplicates(),
                                                        on=['lats_arcmin', 'lons_arcmin'])



In [321]:

    
sls_flare_sampling_df = sls_hotspot_sampling_df.merge(sls_flare_df[['lats_arcmin', 'lons_arcmin']].drop_duplicates(), 
                                                        on=['lats_arcmin', 'lons_arcmin'])

Exploring Sampling Cloud Cover across the sensors

Looking at the sampling dataframe first it can be seen that the SLSTR data has too low cloud cover for 2017, cause is currently unknown. THe same is found for AT1, and that can be corrected for using the data from ATS and AT2 as all sensors use the same map so the sampling will be made across all sensors at the given locations. For SLSTR we cannot so easily use the ATX sampling to adjust as the flaring map is different so not all the points observed in SLSTR are avialable in the ATX maps. This suggests that we should use the SLSTR map to generate the sampling data from ATSR as could then use the cloud cover statistics from these ATSR data to update the data for SLSTR. As a workaround, we could just you cloud statistics from the nearest ATX sample to update the cloud cover for SLSTR, and it does need to be updated to get reliable estimates of flaring activity in terms of FRP.



In [322]:

    
global_sampling = sls_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
                                                                              'cloud_cover': np.mean})
global_sampling









    Out[322]:







  
    
      
      year
      sensor
      sample_counts_all
      cloud_cover
    
  
  
    
      0
      2017.0
      sls
      518524
      0.399186
    
    
      1
      2018.0
      sls
      344847
      0.414918

We do the same analysis done above for the SLSTR data on the ATX data. It is apparent that the annual mean cloud cover for AT1 is too low. It should be aroun 60% and is typically far lower than this. This is due to deficiencies in the cloud masking avaialble on AT1. So we need to update the sampling cloud cover values for AT1 in an approrpiate way so that they reflect typical cloud cover for a given features. Also note that the total sample counts in some yeasrs is too low (considering only full years of operation). This is indicative of poor sensor behaviour in those years, and must be mentioned in the paper.



In [323]:

    
# get the global annual FRP values
global_sampling = ats_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
                                                                              'cloud_cover': np.mean})
global_sampling









    Out[323]:







  
    
      
      year
      sensor
      sample_counts_all
      cloud_cover
    
  
  
    
      0
      1991.0
      at1
      708397
      0.569784
    
    
      1
      1992.0
      at1
      1747059
      0.465828
    
    
      2
      1993.0
      at1
      1901109
      0.371173
    
    
      3
      1994.0
      at1
      1908243
      0.336463
    
    
      4
      1995.0
      at1
      1843012
      0.378199
    
    
      5
      1996.0
      at1
      876837
      0.411917
    
    
      6
      1996.0
      at2
      1101700
      0.596817
    
    
      7
      1997.0
      at2
      2178329
      0.602030
    
    
      8
      1998.0
      at2
      2130302
      0.598983
    
    
      9
      1999.0
      at2
      2048695
      0.613651
    
    
      10
      2000.0
      at2
      2098806
      0.616853
    
    
      11
      2001.0
      at2
      1823957
      0.615854
    
    
      12
      2002.0
      at2
      449261
      0.657764
    
    
      13
      2002.0
      ats
      1321865
      0.586336
    
    
      14
      2003.0
      ats
      2347610
      0.596375
    
    
      15
      2004.0
      ats
      2416447
      0.599768
    
    
      16
      2005.0
      ats
      2461931
      0.591142
    
    
      17
      2006.0
      ats
      2370869
      0.601618
    
    
      18
      2007.0
      ats
      2430181
      0.599744
    
    
      19
      2008.0
      ats
      2509595
      0.614411
    
    
      20
      2009.0
      ats
      2488354
      0.604354
    
    
      21
      2010.0
      ats
      2438859
      0.596634
    
    
      22
      2011.0
      ats
      2474633
      0.599150
    
    
      23
      2012.0
      ats
      750483
      0.625619

Updating AT1 Cloud Cover Percentages

To update the AT1 cloud cover percentages first the non AT1 samples are extracted into a new dataframe. These non AT1 samples are then grouped by lat and lon, taking the mean cloud cover for each sample location. A new sensor column is added to the grouped data, and the sensor is assinged as AT1. Also, the cloud cover column name is reset to updated_cloud_cover. The modified grouped dataframe is then merged back on to the main sampling dataframe by lat lon and sensor. All AT1 rows now have an updated cloud cover value associated with them, and this updated cloud cover is used to provide an update to the original cloud cover column. In effect the cloud cover column in every AT1 row is assigned the updated cloud cover value. Lastly, the updated cloud cover column is dropped from the sampling dataframe as it is no longer needed.



In [324]:

    
def update_at1_cloud_cover(ats_sampling_df):
    not_at1_sampling = ats_sampling_df[ats_sampling_df.sensor !='at1'].copy()
    not_at1_sampling_mean_cc = not_at1_sampling.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
    not_at1_sampling_mean_cc['sensor'] = 'at1'
    not_at1_sampling_mean_cc.rename(columns={'cloud_cover':'updated_cloud_cover'}, inplace=True)
    ats_sampling_df = ats_sampling_df.merge(not_at1_sampling_mean_cc, on=['lats_arcmin', 'lons_arcmin', 'sensor'], how='outer')
    print 'Number at1 sampling locations no updated:', np.sum((ats_sampling_df.sensor=='at1') & (~ats_sampling_df.updated_cloud_cover.notnull()))
    
    mask = ats_sampling_df.updated_cloud_cover.notnull()
    ats_sampling_df.cloud_cover.loc[mask] = ats_sampling_df.updated_cloud_cover.loc[mask]
    ats_sampling_df.drop('updated_cloud_cover', axis=1, inplace=True)
    return ats_sampling_df



In [325]:

    
ats_flare_sampling_df = update_at1_cloud_cover(ats_flare_sampling_df)









    



Number at1 sampling locations no updated: 0

Checking the sampling data again, we can see that the AT1 cloud cover statistics are now more representative of those across the AT2 and ATS data. So the values at each individual AT1 flare will now be much more realistic.



In [326]:

    
# get the global annual FRP values
global_sampling = ats_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
                                                                              'cloud_cover': np.mean})
global_sampling









    Out[326]:







  
    
      
      year
      sensor
      sample_counts_all
      cloud_cover
    
  
  
    
      0
      1991.0
      at1
      708397
      0.604478
    
    
      1
      1992.0
      at1
      1747059
      0.601497
    
    
      2
      1993.0
      at1
      1901109
      0.600951
    
    
      3
      1994.0
      at1
      1908243
      0.602570
    
    
      4
      1995.0
      at1
      1843012
      0.600449
    
    
      5
      1996.0
      at1
      876837
      0.604399
    
    
      6
      1996.0
      at2
      1101700
      0.596817
    
    
      7
      1997.0
      at2
      2178329
      0.602030
    
    
      8
      1998.0
      at2
      2130302
      0.598983
    
    
      9
      1999.0
      at2
      2048695
      0.613651
    
    
      10
      2000.0
      at2
      2098806
      0.616853
    
    
      11
      2001.0
      at2
      1823957
      0.615854
    
    
      12
      2002.0
      at2
      449261
      0.657764
    
    
      13
      2002.0
      ats
      1321865
      0.586336
    
    
      14
      2003.0
      ats
      2347610
      0.596375
    
    
      15
      2004.0
      ats
      2416447
      0.599768
    
    
      16
      2005.0
      ats
      2461931
      0.591142
    
    
      17
      2006.0
      ats
      2370869
      0.601618
    
    
      18
      2007.0
      ats
      2430181
      0.599744
    
    
      19
      2008.0
      ats
      2509595
      0.614411
    
    
      20
      2009.0
      ats
      2488354
      0.604354
    
    
      21
      2010.0
      ats
      2438859
      0.596634
    
    
      22
      2011.0
      ats
      2474633
      0.599150
    
    
      23
      2012.0
      ats
      750483
      0.625619

Updating SLS Cloud Cover Percentages

In the case of SLSTR we do not have ATS samples avialable for all SLSTR hotspot sampling locations. So instead we locate the approximate nearest hotspot using euclidean distance metric and assigne each SLSTR sampling grid cell the cloud cover from the ATX sampled dataframe. We do not need to be exact here as cloud cover regimes ares consistent over hundreds of kilometres.



In [327]:

    
def update_slstr_cloud_cover(ats_sampling_df, sls_sampling_df):
    
    # group ATS data to unique sample locations and get cc and loc
    ats_sampling_mean_cc = ats_sampling_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
    ats_cloud_cover = ats_sampling_mean_cc.cloud_cover.values
    ats_lats = ats_sampling_mean_cc.lats_arcmin.values
    ats_lons = ats_sampling_mean_cc.lons_arcmin.values
    
    # group sls data to unique sample locations
    sls_sampling_mean_cc = sls_sampling_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'cloud_cover':np.mean})
    sls_sampling_mean_cc.drop('cloud_cover', axis=1, inplace=True)
    sls_lats = sls_sampling_mean_cc.lats_arcmin.values
    sls_lons = sls_sampling_mean_cc.lons_arcmin.values
    
    ats_lat_lon = np.dstack([np.deg2rad(ats_lats), 
                              np.deg2rad(ats_lons)])[0]
    ats_balltree = BallTree(ats_lat_lon, metric='euclidean')

    # get the unique flare lats and lons for assessment in kdtree
    sls_locations = np.array(zip(np.deg2rad(sls_lats), 
                                   np.deg2rad(sls_lons)))

    # compare the flare locations to the potential locations in the orbit
    distances, indexes = ats_balltree.query(sls_locations, k=1) 
    
    # get update cc and append to df
    updated_cc = ats_cloud_cover[indexes]
    sls_sampling_mean_cc['cloud_cover'] = updated_cc
    
    # merge updated grouped data onto sls dataframe
    sls_sampling_df.drop('cloud_cover', axis=1, inplace=True)
    sls_sampling_df = sls_sampling_df.merge(sls_sampling_mean_cc, on=['lats_arcmin', 'lons_arcmin'], how='outer')

    return sls_sampling_df



In [328]:

    
sls_flare_sampling_df = update_slstr_cloud_cover(ats_flare_sampling_df, sls_flare_sampling_df.copy())



In [329]:

    
# get the global annual FRP values
global_sampling = sls_flare_sampling_df.groupby(['year', 'sensor'], as_index=False).agg({'sample_counts_all': np.sum,
                                                                              'cloud_cover': np.mean})
global_sampling









    Out[329]:







  
    
      
      year
      sensor
      sample_counts_all
      cloud_cover
    
  
  
    
      0
      2017.0
      sls
      518524
      0.565048
    
    
      1
      2018.0
      sls
      344847
      0.578695

Sampling Stats



In [330]:

    
print 'n persistent ats:', ats_flare_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]

gp = ats_flare_sampling_df.groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_all':np.sum})
print gp.sample_counts_all.mean()
print gp.sample_counts_all.std()

print 'n persistent sls:', sls_flare_sampling_df.drop_duplicates(['lats_arcmin', 'lons_arcmin']).shape[0]

gp = sls_flare_sampling_df.groupby(['lats_arcmin', 'lons_arcmin']).agg({'sample_counts_all':np.sum})
print gp.sample_counts_all.mean()
print gp.sample_counts_all.std()









    



n persistent ats: 26083
1718.61112602078
211.88345956928114
n persistent sls: 10428
82.79353663214422
14.667029342157146

Write Out Flare Activity and Sampling Datasets to BADC-CSV



In [331]:

    
def create_header(sensor, fname, a=True):
    
    if sensor in ['at1', 'at2', 'ats']:
    
        if a:
            text = ["Conventions,G,BADC-CSV,1,", 
                    "title,G,ATSR-SLSTR Global Gas Flaring Activity Dataset,",
                    "creator,G,Daniel Fisher,King's College London,",
                    "activity,G,KCL Global Gas Flaring,",
                    "location,G,global,",
                    "null_value,G,-999,",
                    "last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
                    "comments,G,Data record of active thermal anomalies and associated fire radiative power,",
                    "comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers,",
                    "long_name,lat_arcmin,latitude arcminute grid cell index for pixel,",
                    "long_name,lon_arcmin,longitude arcminute grid cell index for pixel,",
                    "long_name,bg_mwir_rad_3_7,mean background 3.7 um spectral radiance,W m^-2 sr^-1 um^-1,",
                    "long_name,pixel_area,thermal anomaly pixel area,m^2,",
                    "long_name,lat,thermal anomaly containing pixel latitude,degrees,",
                    "coordinate_variable,lat,y,",
                    "long_name,swir_rad_1_6,thermal anomaly containing pixel 1.6 um spectral radiance,W m^-2 sr^-1 um^-1,",
                    "long_name,frp,thermal anomaly containing pixel fire radiative power,W m^-2,",
                    "long_name,mwir_rad_3_7,thermal anomaly containing pixel 3.7 um spectral radiance,W m^-2 sr^-1 um^-1,",
                    "long_name,lon,thermal anomaly containing pixel longitude,degrees,",
                    "coordinate_variable,lon,x,",
                    "long_name,year,year of satellite overpass,",
                    "long_name,month,month of satellite overpass,",
                    "long_name,day,day of satellite overpass,",
                    "long_name,hhmm,hour and minute of satellite overpass,",
                    "long_name,sensor,satellite abbreviation,",
                    "data,",
                    ]
        else:
            text = ["Conventions,G,BADC-CSV,1,", 
                    "title,G,ATSR-SLSTR Global Gas Flaring Sampling Dataset,",
                    "creator,G,Daniel Fisher, King's College London,",
                    "activity,G,KCL Global Gas Flaring,",
                    "location,G,global,",
                    "null_value,G,-999,",
                    "last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
                    "comments,G,Data record of samples of grid cells known to contain thermal anomalies,",
                    "comments,G,and measures of cloud cover in proximity (±8 pixels) to the anomaly location,",
                    "comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers.,",
                    "comments,G,Only for use in conjunction with the activity dataset.,",
                    "long_name,lat_arcmin,latitude arcminute grid cell index, ",
                    "coordinate_variable,lat_arcmin,y,",
                    "long_name,lon_arcmin,longitude arcminute grid cell index,",
                    "coordinate_variable,lon_arcmin,x,",
                    "long_name,cloud_cover,mean fractional cloud cover for grid cell for overpass,",
                    "long_name,year,year of satellite overpass,",
                    "long_name,month,month of satellite overpass,",
                    "long_name,day,day of satellite overpass,",
                    "long_name,hhmm,hour and minute of satellite overpass,",
                    "long_name,sensor,satellite abbreviation,",
                    "data,",
                        ]
    else:               
        if a:
            text = ["Conventions,G,BADC-CSV,1,", 
                    "title,G,ATSR-SLSTR Global Gas Flaring Activity Dataset,",
                    "creator,G,Daniel Fisher,King's College London,",
                    "activity,G,KCL Global Gas Flaring,",
                    "location,G,global,",
                    "null_value,G,-999,", 
                    "last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
                    "comments,G,Data record of pixels with active thermal anomalies and associated fire radiative power,",
                    "comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers,",
                    "long_name,lat_arcmin,latitude arcminute grid cell index for pixel,",
                    "long_name,lon_arcmin,longitude arcminute grid cell index for pixel,",
                    "long_name,swir_rad_2_2,thermal anomaly containing pixel 2.2 um spectral radiance,W m^-2 sr^-1 um^-1,",
                    "long_name,frp,thermal anomaly containing pixel fire radiative power,W m^-2,",
                    "long_name,lat,thermal anomaly containing pixel latitude,degrees,",
                    "coordinate_variable,lat,y,",
                    "long_name,swir_rad_1_6,thermal anomaly containing pixel 1.6 um spectral radiance,W m^-2 sr^-1 um^-1,",
                    "long_name,lon,thermal anomaly containing pixel longitude,degrees,",
                    "coordinate_variable,lon,x,",
                    "long_name,pixel_area,thermal anomaly pixel area,m^2,",
                    "long_name,year,year of satellite overpass,",
                    "long_name,month,month of satellite overpass,",
                    "long_name,day,day of satellite overpass,",
                    "long_name,hhmm,hour and minute of satellite overpass,",
                    "long_name,sensor,satellite abbreviation,",
                    "data,",
                    ]
        else:
            text = ["Conventions,G,BADC-CSV,1,", 
                    "title,G,ATSR-SLSTR Global Gas Flaring Sampling Dataset,",
                    "creator,G,Daniel Fisher, King's College London,",
                    "activity,G,KCL Global Gas Flaring,",
                    "location,G,global,",
                    "null_value,G,-999,",
                    "last_revised_date,G,{0},".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
                    "comments,G,Data record of samples of grid cells known to contain thermal anomalies,",
                    "comments,G,and measures of cloud cover in proximity (±8 pixels) to the anomaly location,",
                    "comments,G,from the Along Track Scanning and Sea and Land Surface Temperature Radiometers.,",
                    "comments,G,Only for use in conjunction with the activity dataset.,",
                    "long_name,lat_arcmin,latitude arcminute grid cell index,",
                    "coordinate_variable,lat_arcmin,y,",
                    "long_name,lon_arcmin,longitude arcminute grid cell index,",
                    "coordinate_variable,lon_arcmin,x,",
                    "long_name,cloud_cover,mean fractional cloud cover for grid cell for overpass,",
                    "long_name,year,year of satellite overpass,",
                    "long_name,month,month of satellite overpass,",
                    "long_name,day,day of satellite overpass,",
                    "long_name,hhmm,hour and minute of satellite overpass,",
                    "long_name,sensor,satellite abbreviation,",
                    "data,",
                        ]
        

    with open(os.path.join(output_path_ds, fname),'wb') as f:
        for line in text:
            
            f.write(line)
            f.write('\n')

ATSR flare activity first



In [332]:

    
ats_flare_df.columns









    Out[332]:





Index([u'Unnamed: 0', u'lats_arcmin', u'lons_arcmin', u'bg_size_used',
       u'inval_pixels_bg_pc', u'mwir_bg', u'pixel_size', u'lats',
       u'hotspot_bg_pc', u'cloud_cover', u'swir_radiances', u'frp',
       u'mwir_radiances', u'lons', u'year', u'month', u'day', u'hhmm',
       u'sensor', u'se_dist', u'frp_coeff', u'sample_counts_flare'],
      dtype='object')



In [333]:

    
ats_flare_df_out = ats_flare_df.copy()
ats_flare_df_out.drop(ats_flare_df_out.columns[[0, 3, 4, 8, 9, -3, -2, -1]], axis=1, inplace=True)
ats_flare_df_out.head()
ats_flare_df_out.rename(columns={'lats_arcmin': 'lat_arcmin', 
                                 'lons_arcmin': 'lon_arcmin',
                                 'bg_size_used': 'bg_win_size',
                                 'inval_pixels_bg_pc': 'bg_invalid',
                                 'lats': 'lat',
                                 'lons': 'lon',
                                 'hotspot_bg_pc': 'bg_hotspot',
                                 'pixel_size': 'pixel_area',
                                 'swir_radiances': 'swir_rad_1_6',
                                 'mwir_radiances': 'mwir_rad_3_7',
                                 'mwir_bg': 'bg_mwir_rad_3_7'}, inplace=True)
ats_flare_df_out.bg_mwir_rad_3_7[ats_flare_df_out.bg_mwir_rad_3_7 == -1] = -999









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:15: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app



In [73]:

    
years = np.arange(1991, 2013)
months = np.arange(1,13)

for y in years:
    
    y_f = str(y).zfill(2)

    for m in months:
        
        # subset data frame
        ym_df = ats_flare_df_out[(ats_flare_df_out.month == m) & (ats_flare_df_out.year == y)]
        if ym_df.empty:
            continue
            
        m_f = str(m).zfill(2)
            
        # split by sensor
        if 'at1' in ym_df.sensor.values:
            # create csv
            at1_fname = 'at1_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
            create_header('at1', at1_fname)

            # append dataframe and footer to csv
            at1_ym_df = ym_df[ym_df.sensor == 'at1']
            with open(os.path.join(output_path_ds, at1_fname), 'a') as f:
                at1_ym_df.to_csv(f, index=False)
                f.write('end data,')            
            
        if 'at2' in ym_df.sensor.values:
            # create csv
            at2_fname = 'at2_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
            create_header('at2', at2_fname)

            # append dataframe and footer to csv
            at2_ym_df = ym_df[ym_df.sensor == 'at2']
            with open(os.path.join(output_path_ds, at2_fname), 'a') as f:
                at2_ym_df.to_csv(f, index=False)
                f.write('end data,')   
            
        if 'ats' in ym_df.sensor.values:
            # create csv
            ats_fname = 'ats_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
            create_header('ats', ats_fname)

            # append dataframe and footer to csv
            ats_ym_df = ym_df[ym_df.sensor == 'ats']
            with open(os.path.join(output_path_ds, ats_fname), 'a') as f:
                ats_ym_df.to_csv(f, index=False)
                f.write('end data,')   
        
print 'done'









    



---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-73-245dc9ad56b9> in <module>()
     19             # create csv
     20             at1_fname = 'at1_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
---> 21             create_header('at1', at1_fname)
     22 
     23             # append dataframe and footer to csv

<ipython-input-70-da9b35650ca4> in create_header(sensor, fname, a)
    109 
    110 
--> 111     with open(os.path.join(output_path_ds, fname),'wb') as f:
    112         for line in text:
    113 

IOError: [Errno 2] No such file or directory: '/Users/danielfisher/Projects/kcl-globalgasflaring/data/output/at1_global_19910801_ggf_activity_monthly_v1.csv'

ATS sampling



In [ ]:

    
ats_flare_sampling_df_out = ats_flare_sampling_df.copy()
ats_flare_sampling_df_out.drop(ats_flare_sampling_df_out.columns[[0, -1]], axis=1, inplace=True)
ats_flare_sampling_df_out.rename(columns={'lats_arcmin': 'lat_arcmin', 
                                 'lons_arcmin': 'lon_arcmin'}, inplace=True)



In [ ]:

    
years = np.arange(1991, 2013)
months = np.arange(1,13)

for y in years:
    
    y_f = str(y).zfill(2)

    for m in months:
        
        # subset data frame
        ym_df = ats_flare_sampling_df_out[(ats_flare_sampling_df_out.month == m) & (ats_flare_sampling_df_out.year == y)]
        if ym_df.empty:
            continue
            
        m_f = str(m).zfill(2)
            
        # split by sensor
        if 'at1' in ym_df.sensor.values:
            # create csv
            at1_fname = 'at1_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
            create_header('at1', at1_fname, a=False)

            # append dataframe and footer to csv
            at1_ym_df = ym_df[ym_df.sensor == 'at1']
            with open(os.path.join(output_path_ds, at1_fname), 'a') as f:
                at1_ym_df.to_csv(f, index=False)
                f.write('end data,')            
            
        if 'at2' in ym_df.sensor.values:
            # create csv
            at2_fname = 'at2_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
            create_header('at2', at2_fname, a=False)

            # append dataframe and footer to csv
            at2_ym_df = ym_df[ym_df.sensor == 'at2']
            with open(os.path.join(output_path_ds, at2_fname), 'a') as f:
                at2_ym_df.to_csv(f, index=False)
                f.write('end data,')   
            
        if 'ats' in ym_df.sensor.values:
            # create csv
            ats_fname = 'ats_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
            create_header('ats', ats_fname, a=False)

            # append dataframe and footer to csv
            ats_ym_df = ym_df[ym_df.sensor == 'ats']
            with open(os.path.join(output_path_ds, ats_fname), 'a') as f:
                ats_ym_df.to_csv(f, index=False)
                f.write('end data,')   
        
print 'done'



In [ ]:

    
sls_flare_df_out = sls_flare_df.copy()
sls_flare_df_out.drop(sls_flare_df_out.columns[[0, 6, 7, 9, -2, -1]], axis=1, inplace=True)
sls_flare_df_out.rename(columns={'lats_arcmin': 'lat_arcmin', 
                                 'lons_arcmin': 'lon_arcmin',
                                 'lats': 'lat',
                                 'lons': 'lon',
                                 'pixel_size': 'pixel_area',
                                 'swir_radiances': 'swir_rad_1_6',
                                 'swir_radiances_22': 'swir_rad_2_2'}, inplace=True)



In [ ]:

    
years = np.arange(2017, 2019)
months = np.arange(1,13)

for y in years:
    
    y_f = str(y).zfill(2)

    for m in months:
        
        # subset data frame
        ym_df = sls_flare_df_out[(sls_flare_df_out.month == m) & (sls_flare_df_out.year == y)]
        if ym_df.empty:
            continue
            
        m_f = str(m).zfill(2)
            
        # split by sensor
        if 'sls' in ym_df.sensor.values:
            # create csv
            sls_fname = 'sls_global_{0}{1}01_ggf_activity_monthly_v1.csv'.format(y_f, m_f)
            create_header('sls', sls_fname)

            # append dataframe and footer to csv
            sls_ym_df = ym_df[ym_df.sensor == 'sls']
            with open(os.path.join(output_path_ds, sls_fname), 'a') as f:
                sls_ym_df.to_csv(f, index=False)
                f.write('end data,')



In [ ]:

    
sls_flare_sampling_df_out = sls_flare_sampling_df.copy()
sls_flare_sampling_df_out.drop(sls_flare_sampling_df_out.columns[[0, -2]], axis=1, inplace=True)
sls_flare_sampling_df_out.rename(columns={'lats_arcmin': 'lat_arcmin', 
                                          'lons_arcmin': 'lon_arcmin',
                                          }, inplace=True)



In [ ]:

    
years = np.arange(2017, 2019)
months = np.arange(1,13)

for y in years:
    
    y_f = str(y).zfill(2)

    for m in months:
        
        # subset data frame
        ym_df = sls_flare_sampling_df_out[(sls_flare_sampling_df_out.month == m) & (sls_flare_sampling_df_out.year == y)]
        if ym_df.empty:
            continue
            
        m_f = str(m).zfill(2)
            
        # split by sensor
        if 'sls' in ym_df.sensor.values:
            # create csv
            sls_fname = 'sls_global_{0}{1}01_ggf_sampling_monthly_v1.csv'.format(y_f, m_f)
            create_header('sls', sls_fname, a=False)

            # append dataframe and footer to csv
            sls_ym_df = ym_df[ym_df.sensor == 'sls']
            with open(os.path.join(output_path_ds, sls_fname), 'a') as f:
                sls_ym_df.to_csv(f, index=False)
                f.write('end data,')



In [610]:

    
ats_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()









    Out[610]:







  
    
      
      year
      month
      day
      hhmm
    
  
  
    
      0
      1991.0
      8.0
      1.0
      15
    
    
      1
      1991.0
      8.0
      2.0
      14
    
    
      2
      1991.0
      8.0
      3.0
      14
    
    
      3
      1991.0
      8.0
      4.0
      15
    
    
      4
      1991.0
      8.0
      5.0
      14
    
    
      5
      1991.0
      8.0
      6.0
      14
    
    
      6
      1991.0
      8.0
      7.0
      15
    
    
      7
      1991.0
      8.0
      8.0
      14
    
    
      8
      1991.0
      8.0
      9.0
      14
    
    
      9
      1991.0
      8.0
      14.0
      2
    
    
      10
      1991.0
      8.0
      15.0
      14
    
    
      11
      1991.0
      8.0
      16.0
      15
    
    
      12
      1991.0
      8.0
      17.0
      13
    
    
      13
      1991.0
      8.0
      18.0
      14
    
    
      14
      1991.0
      8.0
      19.0
      14
    
    
      15
      1991.0
      8.0
      20.0
      13
    
    
      16
      1991.0
      8.0
      21.0
      14
    
    
      17
      1991.0
      8.0
      22.0
      15
    
    
      18
      1991.0
      8.0
      23.0
      13
    
    
      19
      1991.0
      8.0
      24.0
      13
    
    
      20
      1991.0
      8.0
      25.0
      14
    
    
      21
      1991.0
      8.0
      26.0
      14
    
    
      22
      1991.0
      8.0
      27.0
      14
    
    
      23
      1991.0
      8.0
      28.0
      15
    
    
      24
      1991.0
      8.0
      29.0
      14
    
    
      25
      1991.0
      8.0
      30.0
      14
    
    
      26
      1991.0
      8.0
      31.0
      15
    
    
      27
      1991.0
      9.0
      1.0
      14
    
    
      28
      1991.0
      9.0
      2.0
      14
    
    
      29
      1991.0
      9.0
      3.0
      15
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      7235
      2012.0
      3.0
      10.0
      14
    
    
      7236
      2012.0
      3.0
      11.0
      14
    
    
      7237
      2012.0
      3.0
      12.0
      15
    
    
      7238
      2012.0
      3.0
      13.0
      14
    
    
      7239
      2012.0
      3.0
      14.0
      13
    
    
      7240
      2012.0
      3.0
      15.0
      15
    
    
      7241
      2012.0
      3.0
      16.0
      14
    
    
      7242
      2012.0
      3.0
      17.0
      15
    
    
      7243
      2012.0
      3.0
      18.0
      14
    
    
      7244
      2012.0
      3.0
      19.0
      14
    
    
      7245
      2012.0
      3.0
      20.0
      15
    
    
      7246
      2012.0
      3.0
      21.0
      14
    
    
      7247
      2012.0
      3.0
      22.0
      14
    
    
      7248
      2012.0
      3.0
      23.0
      15
    
    
      7249
      2012.0
      3.0
      24.0
      14
    
    
      7250
      2012.0
      3.0
      25.0
      15
    
    
      7251
      2012.0
      3.0
      26.0
      14
    
    
      7252
      2012.0
      3.0
      27.0
      14
    
    
      7253
      2012.0
      3.0
      28.0
      15
    
    
      7254
      2012.0
      3.0
      29.0
      14
    
    
      7255
      2012.0
      3.0
      30.0
      14
    
    
      7256
      2012.0
      3.0
      31.0
      15
    
    
      7257
      2012.0
      4.0
      1.0
      14
    
    
      7258
      2012.0
      4.0
      2.0
      14
    
    
      7259
      2012.0
      4.0
      3.0
      15
    
    
      7260
      2012.0
      4.0
      4.0
      14
    
    
      7261
      2012.0
      4.0
      5.0
      15
    
    
      7262
      2012.0
      4.0
      6.0
      14
    
    
      7263
      2012.0
      4.0
      7.0
      14
    
    
      7264
      2012.0
      4.0
      8.0
      7
    
  

7265 rows × 4 columns

Monthly Sample Counts



In [612]:

    
#ats_monthly_counts = ats_hotspot_sampling_df.groupby(['year', 'month', 'day'], as_index=False).agg({'sample_counts_all': np.sum})
ats_monthly_counts = ats_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()
ats_monthly_counts['dt'] = pd.to_datetime(ats_monthly_counts.year*10000+
                                          ats_monthly_counts.month*100+
                                          ats_monthly_counts.day,format='%Y%m%d')

#sls_monthly_counts = sls_hotspot_sampling_df.groupby(['year', 'month', 'day'], as_index=False).agg({'sample_counts_all': np.sum})
#sls_monthly_counts = sls_hotspot_sampling_df.groupby(['year', 'month', 'day']).hhmm.nunique().reset_index()
sls_monthly_counts['dt'] = pd.to_datetime(sls_monthly_counts.year*10000+
                                          sls_monthly_counts.month*100+
                                          sls_monthly_counts.day,format='%Y%m%d')

plt.figure(figsize=(20,5))
plt.plot(ats_monthly_counts.dt, ats_monthly_counts.hhmm, 'r.')
#plt.plot(sls_monthly_counts.dt, sls_monthly_counts.sample_counts_all, 'b.')

plt.show()

# plt.close('all')
# #plt.figure(figsize=(30,10))

# # Initialize the FacetGrid object
# pal = sns.cubehelix_palette(1, rot=-.25, light=.7)
# g = sns.FacetGrid(ats_monthly_counts, row="Year", hue="Year", aspect=15, height=0.8, palette=pal)

# g.map(sns.barplot, "Month", "sample_counts_all")
# #g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
# #g.map(plt.axhline, y=0, lw=2, clip_on=False)

# # Define and use a simple function to label the plot in axes coordinates
# def label(x, color, label):
#     ax = plt.gca()
#     ax.text(0, .2, label, fontweight="bold", color='k', 
#             ha="left", va="center", transform=ax.transAxes)

# g.map(label, "Month")

# g.set_titles("")
# plt.savefig(os.path.join(output_path, 'monthly_sample_counts_ats.png'),  dpi=600)
# plt.show()

Monthly Flare Counts



In [74]:

    
ats_monthly_counts = ats_flare_df.groupby(['year', 'month'], as_index=False).agg({'sample_counts_flare': np.sum})
ats_monthly_counts['yearmonth'] =ats_monthly_counts.year*100+ats_monthly_counts.month
ats_monthly_counts.set_index('yearmonth', inplace=True)
ats_monthly_counts.drop(['year', 'month'], axis=1, inplace=True)



In [75]:

    
year = (np.arange(1991,2013,1)*100).repeat(12)
month = np.tile(np.arange(1,13,1), 2013-1991)
idx =  year + month



In [76]:

    
ats_monthly_counts = ats_monthly_counts.reindex(idx, fill_value=0)
ats_monthly_counts['Year'] = year / 100
ats_monthly_counts['Month'] = month



In [557]:

    
plt.close('all')
#plt.figure(figsize=(30,10))

# Initialize the FacetGrid object
pal = sns.cubehelix_palette(1, rot=-.25, light=.7)
g = sns.FacetGrid(ats_monthly_counts, row="Year", hue="Year", aspect=15, height=0.8, palette=pal)

g.map(sns.barplot, "Month", "sample_counts_flare")
#g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
#g.map(plt.axhline, y=0, lw=2, clip_on=False)

# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .2, label, fontweight="bold", color='k', 
            ha="left", va="center", transform=ax.transAxes)

g.map(label, "Month")

g.set_titles("")
plt.savefig(os.path.join(output_path, 'monthly_flare_counts_ats.png'),  dpi=600)
plt.show()









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/seaborn/axisgrid.py:715: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
  warnings.warn(warning)
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/matplotlib/tight_layout.py:209: UserWarning: tight_layout cannot make axes height small enough to accommodate all axes decorations
  warnings.warn('tight_layout cannot make axes height small enough '

------- VIIRS COMPARISON: DETECTION -------



In [78]:

    
## ATS Flare persistency



In [79]:

    
args = ['lats_arcmin', 'lons_arcmin', 'year', 'month', 'day']
ats_flare_locations = ats_flare_df.drop_duplicates(args)[args]
ats_flare_locations['min_time'] =  pd.to_datetime((ats_flare_locations.year*10000+ats_flare_locations.month*100+ats_flare_locations.day).apply(str),format='%Y%m%d')
ats_flare_locations['max_time'] =  pd.to_datetime((ats_flare_locations.year*10000+ats_flare_locations.month*100+ats_flare_locations.day).apply(str),format='%Y%m%d')

ats_flare_locations = ats_flare_locations.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'min_time':np.min, 'max_time': np.max})

t_dff = ats_flare_locations.max_time - ats_flare_locations.min_time



In [80]:

    
t_dff.describe()









    Out[80]:





count                        26083
mean     4018 days 09:12:44.773990
std      2359 days 12:41:16.780449
min                0 days 00:00:00
25%             1960 days 00:00:00
50%             3830 days 00:00:00
75%             6292 days 00:00:00
max             7551 days 00:00:00
dtype: object

VIIRS Industrial Activity vs. ATX and SLS (Flare detection eval)

First plot VIIRS flares into flaring and non-flaring hotspots:



In [81]:

    
viirs_industrial_df['is_flare'] = viirs_industrial_df.name_id == 4
viirs_industrial_df['sample_counts_flare'] = 10
viirs_industrial_df = dist_to_nearest_volcano(volc_df, viirs_industrial_df)



In [897]:

    
plot_data_screening_v3(ats_ratio_df, sls_ratio_df, viirs_industrial_df)

Comparing Detections within 1 arcminute for all sensors



In [86]:

    
def within_dist(target_df, observed_df):
    '''
    The target dataframe contains the assumed TRUE flares.  We compare all flares in 
    the observed dataframe to this flares in the target df, and if the observed flare
    is within 2km of any target flares then it is flagged as being a match (i.e. observes
    a flare in the same location as the target df)
    '''
    
    target_lat_lon = np.dstack([target_df.lats_arcmin.values, target_df.lons_arcmin.values])[0]
    target_balltree = BallTree(target_lat_lon, metric='euclidean')

    # get the unique flare lats and lons for assessment in kdtree
    observed_locations = np.array(zip(observed_df.lats_arcmin.values, observed_df.lons_arcmin.values))

    # compare the flare locations to the potential locations in the orbit
    distances, indexes = target_balltree.query(observed_locations, k=1) 

    # set up the dataframe to hold the distances
    mask = distances < np.sqrt(2)  # if within 1 grid cell (sqrt 2 includes corners)
    observed_df['within_arcmin'] = mask
    return observed_df



In [87]:

    
agg_dict = {'name_id': lambda x:stats.mode(x)[0][0],
            'lats': np.mean,
            'lons': np.mean}
viirs_unique_flares = viirs_industrial_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg(agg_dict)
viirs_unique_flares = viirs_unique_flares[viirs_unique_flares.name_id == 4]
viirs_unique_flares.drop('name_id', axis=1, inplace=True)

ats_unique_flares = ats_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[['lats_arcmin', 'lons_arcmin', 'lats','lons']]
sls_unique_flares = sls_flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin'])[['lats_arcmin', 'lons_arcmin', 'lats','lons']]



In [88]:

    
# adding country level data
ats_unique_flares = ats_unique_flares.merge(ats_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])
sls_unique_flares = sls_unique_flares.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])
viirs_unique_flares = viirs_unique_flares.merge(viirs_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])



In [89]:

    
# dist_to_nearest(target, orbsered)
viirs_unique_flares_ats = within_dist(ats_unique_flares.copy(), viirs_unique_flares.copy())
viirs_unique_flares_sls = within_dist(sls_unique_flares.copy(), viirs_unique_flares.copy())
sls_unique_flares_viirs = within_dist(viirs_unique_flares.copy(), sls_unique_flares.copy())
sls_unique_flares_ats = within_dist(ats_unique_flares.copy(), sls_unique_flares.copy())
ats_unique_flares_viirs = within_dist(viirs_unique_flares.copy(), ats_unique_flares.copy())
ats_unique_flares_sls = within_dist(sls_unique_flares.copy(), ats_unique_flares.copy())

Global matching statistics



In [90]:

    
print 'ats matched / viirs observed:', np.sum(viirs_unique_flares_ats.within_arcmin) * 1.0/ viirs_unique_flares_ats.shape[0]* 100,'%', np.sum(viirs_unique_flares_ats.within_arcmin), viirs_unique_flares_ats.shape[0]
print 'sls matched / viirs observed:', np.sum(viirs_unique_flares_sls.within_arcmin) * 1.0/ viirs_unique_flares_sls.shape[0]* 100,'%', np.sum(viirs_unique_flares_sls.within_arcmin), viirs_unique_flares_sls.shape[0]
print 'viirs matched / ats observed:', np.sum(ats_unique_flares_viirs.within_arcmin) * 1.0/ ats_unique_flares_viirs.shape[0]* 100,'%', np.sum(ats_unique_flares_viirs.within_arcmin), ats_unique_flares_viirs.shape[0]
print 'sls matched / ats observed:', np.sum(ats_unique_flares_sls.within_arcmin) * 1.0/ ats_unique_flares_sls.shape[0]* 100,'%', np.sum(ats_unique_flares_sls.within_arcmin), ats_unique_flares_sls.shape[0]
print 'viirs matched / sls observed:', np.sum(sls_unique_flares_viirs.within_arcmin) * 1.0/ sls_unique_flares_viirs.shape[0]* 100,'%', np.sum(sls_unique_flares_viirs.within_arcmin), sls_unique_flares_viirs.shape[0]
print 'ats matched / sls observed:', np.sum(sls_unique_flares_ats.within_arcmin) * 1.0/ sls_unique_flares_ats.shape[0]* 100,'%', np.sum(sls_unique_flares_ats.within_arcmin), sls_unique_flares_ats.shape[0]









    



ats matched / viirs observed: 36.3144174854622 % 3622 9974
sls matched / viirs observed: 34.499699217966715 % 3441 9974
viirs matched / ats observed: 34.010658283172944 % 8871 26083
sls matched / ats observed: 46.217843039527665 % 12055 26083
viirs matched / sls observed: 49.712313003452245 % 5184 10428
ats matched / sls observed: 64.43229766014576 % 6719 10428

Country Matching Statistics



In [91]:

    
# get top countries according to VIIRS
viirs_top_20 = viirs_unique_flares.groupby('countries').countries.count().sort_values(ascending=False)[0:20]
top_20_countries = viirs_top_20.index.values
top_20_countries.sort()
print 'VIIRS top 20 covers:', viirs_top_20.sum() * 1.0 / viirs_unique_flares.shape[0], '% of flares'









    



VIIRS top 20 covers: 0.8544214958893123 % of flares



In [92]:

    
viirs_unique_flares_ats_t20 = viirs_unique_flares_ats[viirs_unique_flares_ats['countries'].isin(top_20_countries)]
viirs_unique_flares_sls_t20 = viirs_unique_flares_sls[viirs_unique_flares_sls['countries'].isin(top_20_countries)]
sls_unique_flares_viirs_t20 = sls_unique_flares_viirs[sls_unique_flares_viirs['countries'].isin(top_20_countries)]
sls_unique_flares_ats_t20 = sls_unique_flares_ats[sls_unique_flares_ats['countries'].isin(top_20_countries)]
ats_unique_flares_viirs_t20 = ats_unique_flares_viirs[ats_unique_flares_viirs['countries'].isin(top_20_countries)]
ats_unique_flares_sls_t20 = ats_unique_flares_sls[ats_unique_flares_sls['countries'].isin(top_20_countries)]



In [93]:

    
viirs_ats_ratio = []
viirs_sls_ratio = []
sls_viirs_ratio = []
sls_ats_ratio = []
ats_viirs_ratio = []
ats_sls_ratio = []

for c in top_20_countries:
    
    c_df = viirs_unique_flares_ats_t20[viirs_unique_flares_ats_t20.countries == c]
    viirs_ats_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
    
    c_df = viirs_unique_flares_sls_t20[viirs_unique_flares_sls_t20.countries == c]
    viirs_sls_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
    
    c_df = sls_unique_flares_viirs_t20[sls_unique_flares_viirs_t20.countries == c]
    sls_viirs_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
    
    c_df = sls_unique_flares_ats_t20[sls_unique_flares_ats_t20.countries == c]
    sls_ats_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0] * 100)
    
    c_df = ats_unique_flares_viirs_t20[ats_unique_flares_viirs_t20.countries == c]
    ats_viirs_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)
    
    c_df = ats_unique_flares_sls_t20[ats_unique_flares_sls_t20.countries == c]
    ats_sls_ratio.append(c_df.within_arcmin.sum() * 1.0 / c_df.shape[0]* 100)



In [94]:

    
for i, c in enumerate(top_20_countries):
    print c, viirs_ats_ratio[i], viirs_sls_ratio[i], sls_ats_ratio[i], sls_viirs_ratio[i], ats_sls_ratio[i],  ats_viirs_ratio[i]









    



Algeria 76.92307692307693 70.0 84.65116279069768 41.627906976744185 60.18348623853211 28.440366972477065
Argentina 36.46408839779006 30.386740331491712 61.06870229007634 53.43511450381679 22.66112266112266 26.611226611226613
Australia 33.33333333333333 34.12698412698413 51.546391752577314 55.670103092783506 34.90196078431372 34.509803921568626
Brazil 54.47154471544715 47.15447154471545 68.70748299319727 51.70068027210885 33.00970873786408 25.88996763754045
Canada 12.82051282051282 9.906759906759907 47.3170731707317 58.536585365853654 28.698752228163993 39.037433155080215
China 27.563249001331556 17.70972037283622 47.956403269754766 49.59128065395095 22.60757867694284 30.12202954399486
Egypt 65.359477124183 50.326797385620914 83.19327731092437 57.56302521008403 56.52173913043478 45.65217391304348
India 65.06849315068493 49.31506849315068 74.0 61.0 53.493449781659386 50.0
Indonesia 72.96137339055794 45.493562231759654 84.0909090909091 77.27272727272727 42.193548387096776 50.193548387096776
Iran 75.0 74.07407407407408 87.4820143884892 46.33093525179856 75.03217503217503 35.45688545688546
Iraq 61.386138613861384 71.28712871287128 75.16483516483517 32.747252747252745 71.87878787878788 21.575757575757574
Kazakhstan 62.16216216216216 46.846846846846844 86.31578947368422 39.473684210526315 40.62140391254315 21.173762945914845
Libya 89.52380952380953 63.8095238095238 95.07389162561576 66.00985221674877 56.06469002695418 39.35309973045822
Mexico 64.46280991735537 60.33057851239669 78.42323651452283 50.20746887966805 68.76355748373102 35.791757049891544
Nigeria 85.11904761904762 69.64285714285714 92.46376811594203 59.42028985507246 54.78680611423974 37.16814159292036
Oman 47.0 55.00000000000001 66.33663366336634 50.99009900990099 72.59887005649718 41.80790960451977
Russia 56.01926163723917 37.15890850722312 67.94081381011098 44.32799013563502 33.87123606721011 29.27965396772584
Saudi Arabia 53.41614906832298 46.58385093167702 76.95035460992908 41.843971631205676 62.70627062706271 36.46864686468646
United Kingdom 80.19801980198021 62.37623762376238 90.17341040462428 61.27167630057804 49.696969696969695 39.39393939393939
United States 9.1123562370982 23.857269242111474 17.906482465462272 51.806588735387884 40.638297872340424 43.93617021276596



In [95]:

    
# do the plot
plt.close('all')
fig = plt.figure(figsize=(14, 7))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133)

ind = np.arange(0, 20)

ax0.barh(ind, viirs_sls_ratio, height=0.4, align='edge', color='gray', label = 'S/V')
ax0.barh(ind-0.4, viirs_ats_ratio, height=0.4, align='edge', color='r', label='A/V')
ax0.set_yticks(ind)
ax0.set_xlim([0,100])
ax0.set_ylim([-1,20.5])
ax0.plot([50, 50], [-10, 30], 'k--')
ax0.set_yticklabels(top_20_countries)
ax0.legend()
ax0.set(ylabel='Top 20 Countries', xlabel='Percent Matched')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)


ax1.barh(ind, sls_viirs_ratio, height=0.4, align='edge', color='gray', label='V/S')
ax1.barh(ind-0.4, sls_ats_ratio, height=0.4, align='edge', color='b', label='A/S')
ax1.set_yticks(ind)
ax1.set_xlim([0,100])
ax1.set_ylim([-1,20.5])
ax1.plot([50, 50], [-10, 30], 'k--')
ax1.set_yticklabels('')
ax1.legend()
ax1.set(xlabel='Percent Matched')
ax1.xaxis.label.set_size(12)


ax2.barh(ind, ats_viirs_ratio, height=0.4, align='edge', color='gray', label='V/A')
ax2.barh(ind-0.4, ats_sls_ratio, height=0.4, align='edge', color='y', label='S/A')
ax2.set_yticks(ind)
ax2.set_xlim([0,100])
ax2.set_ylim([-1,20.5])
ax2.plot([50, 50], [-10, 30], 'k--')
ax2.set_yticklabels('')
ax2.legend()
ax2.set(xlabel='Percent Matched')
ax2.xaxis.label.set_size(12)

ax0.text(0.85, 0.02, '(a)', transform=ax0.transAxes, fontsize=18)
ax1.text(0.85, 0.02, '(b)', transform=ax1.transAxes, fontsize=18)
ax2.text(0.85, 0.02, '(c)', transform=ax2.transAxes, fontsize=18)

plt.savefig(os.path.join(output_path, 'detection_bar_charts.png'), bbox_inches='tight', dpi=600)
plt.show()

From the above ATSR sees a lot of the VIIRS and SLSTR flares. Problem area seems to be the states. Why?

FRP in North America Comparison (in one month!)

We have the December 2017 VIIRS data. Assume that over the States in the month of December there are limited wildfires. We restrict to only those points that are >1400K to be consistent with the thresholds applied to VIIRS. The SLSTR data is screened by needing to have at least two detections and a suitable threshold that places it between 1400 and 2800K. Using this assumed flare data subset over North America.

Ths can be visualsied by two cumulative histograms and individual statistics reported

Ideas for the observed behaviour:

Flares could be operating more intermittantly. So we are not getting the correct behaviour as we are observing less frequnetly, so are missing a lot of the flaring activity. This can be checked by processing all of the SLSTR data and evaluating off swath. Or all of the VIIRS data and evaluting only to the same view angles.



In [96]:

    
# Get SLSTR first (using annum data)
sls_df_subset = sls_flare_df_annum[['frp', 'year', 'month', 'lats_arcmin', 'lons_arcmin']]
sls_df_subset = sls_df_subset.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  
                                    on=['lats_arcmin', 'lons_arcmin'])
sls_df_subset = sls_df_subset[((sls_df_subset.countries == 'United States') | 
                              (sls_df_subset.countries == 'Canada')) & 
                              (sls_df_subset.year == 2017) &
                              (sls_df_subset.month == 12)
                             ]
sls_df_grouped = sls_df_subset.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})


# get Nightfire DS and add VZA data
nightfire_df_subset = nightfire_df[['RH', 'Temp_BB', 'lats_arcmin', 'lons_arcmin', 'Sample_M10']]
nightfire_df_subset = nightfire_df_subset.merge(nightfire_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']], 
                                                on=['lats_arcmin', 'lons_arcmin'])
vza = viirs_zenith_angles[nightfire_df_subset.Sample_M10.values.astype(int)-1].astype('float')
nightfire_df_subset['vza'] = vza

# all viirs flare data
nightfire_df_subset_all = nightfire_df_subset[(nightfire_df_subset.RH != 999999) & 
                                          (nightfire_df_subset.Temp_BB > 1600) &
                                          ((nightfire_df_subset.countries == 'United States') | 
                                          (nightfire_df_subset.countries == 'Canada'))]
nightfire_df_grouped_all = nightfire_df_subset_all.groupby(['lats_arcmin', 'lons_arcmin'], 
                                                       as_index=False).agg({'RH': np.mean})

# all VIIRS in VZA
nightfire_df_subset_vza = nightfire_df_subset_all[nightfire_df_subset_all.vza < 22]
nightfire_df_grouped_vza = nightfire_df_subset_vza.groupby(['lats_arcmin', 'lons_arcmin'], 
                                                           as_index=False).agg({'RH': np.mean})

# # all VIIRS > min SLS FRP value
nightfire_df_subset_min = nightfire_df_subset_vza[nightfire_df_subset_vza.RH > sls_df_grouped.frp.min()]
nightfire_df_grouped_min = nightfire_df_subset_min.groupby(['lats_arcmin', 'lons_arcmin'], 
                                                           as_index=False).agg({'RH': np.mean})


# 
plt.close('all')
fig = plt.figure(figsize=(7, 7))
ax0 = plt.subplot(111)
ax0.set_xlim([0,18])

ax0.set_xlabel('Binned FRP (MW)', fontsize=18)
ax0.set_ylabel('Frequency', fontsize=18)

ax0.hist(nightfire_df_grouped_all.RH, bins=50, histtype='step', cumulative=True, color='gray', label='All VIIRS')
ax0.hist(nightfire_df_grouped_vza.RH, bins=50, histtype='bar', cumulative=True, color='gray', label='VIIRS (VZA<22)')
ax0.hist(nightfire_df_grouped_min.RH, bins=50, histtype='bar', cumulative=True, color='r', label='VIIRS (>min(SLSTR))')
ax0.hist(sls_df_grouped.frp, bins=50, histtype='bar', cumulative=True, color='k', label='SLSTR (VZA<22)')
plt.legend(loc=1)
plt.savefig(os.path.join(output_path, 'NA_flare_counts_viirs_vs_slstr.png'), bbox_inches='tight', dpi=600)
plt.show()



In [788]:

    
nightfire_df_subset[nightfire_df_subset.RH < sls_df_grouped.frp.min()].RH.describe()









    Out[788]:





count    17346.000000
mean         0.304909
std          0.069850
min         -0.056087
25%          0.252839
50%          0.313501
75%          0.363788
max          0.410831
Name: RH, dtype: float64

Global matching statistics (NO US/Canada/China)



In [97]:

    
c = ['United States', 'Canada', 'China']
m = ~viirs_unique_flares_ats['countries'].isin(c)
print 'ats matched / viirs observed:', np.sum(viirs_unique_flares_ats.loc[m].within_arcmin) * 1.0/ np.sum(m) * 100,'%', np.sum(viirs_unique_flares_ats.loc[m].within_arcmin), np.sum(m) 
m = ~viirs_unique_flares_sls['countries'].isin(c)
print 'sls matched / viirs observed:', np.sum(viirs_unique_flares_sls.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(viirs_unique_flares_sls.loc[m].within_arcmin), np.sum(m) 
m = ~ats_unique_flares_viirs['countries'].isin(c)
print 'viirs matched / ats observed:', np.sum(ats_unique_flares_viirs.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(ats_unique_flares_viirs.loc[m].within_arcmin), np.sum(m) 
m = ~ats_unique_flares_sls['countries'].isin(c)
print 'sls matched / ats observed:', np.sum(ats_unique_flares_sls.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(ats_unique_flares_sls.loc[m].within_arcmin), np.sum(m) 
m = ~sls_unique_flares_viirs['countries'].isin(c)
print 'viirs matched / sls observed:', np.sum(sls_unique_flares_viirs.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(sls_unique_flares_viirs.loc[m].within_arcmin), np.sum(m) 
m = ~sls_unique_flares_ats['countries'].isin(c)
print 'ats matched / sls observed:', np.sum(sls_unique_flares_ats.loc[m].within_arcmin) * 1.0/ np.sum(m)* 100,'%', np.sum(sls_unique_flares_ats.loc[m].within_arcmin), np.sum(m)









    



ats matched / viirs observed: 60.23321270607157 % 2996 4974
sls matched / viirs observed: 48.532368315239246 % 2414 4974
viirs matched / ats observed: 33.74592833876221 % 7770 23025
sls matched / ats observed: 48.469055374592834 % 11160 23025
viirs matched / sls observed: 48.99673940305995 % 3907 7974
ats matched / sls observed: 76.61148733383496 % 6109 7974

------- VIIRS COMPARISON: CHARACTERISATION -------

VIIRS vs. ATX and SLS OVER IRAQ

In this analysis we do not use the cloud adjusted FRP values as we are not worrying about sampling differences, we just assume that the cloud characterisitics are similar for each sensor (which may be wrong as we are looking at different times of the day) and take the mean of the observations. We could get all the nighttime VIIRS obs to do this, but think it is unecessary. This uses data from March



In [98]:

    
# make sure that the march SLSTR data is restricted to flares only
sls_0317_flares = sls_hotspot_df_0317.copy()
sls_flare_locations = sls_flare_sampling_df_annum[['lats_arcmin', 'lons_arcmin']]
sls_flare_locations.drop_duplicates(inplace=True)

# restrict to flaring sites only
print sls_0317_flares.shape
sls_0317_flares = pd.merge(sls_0317_flares, sls_flare_locations, on=['lats_arcmin', 'lons_arcmin'])
print sls_0317_flares.shape









    



(12277, 19)
(7170, 19)






    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.



In [99]:

    
def round_to_nearest_01_degree(s):
    return (s.round(1) * 10).astype(int)



In [544]:

    
viirs_persistent = viirs_iraq_hotspot_df.copy()
viirs_persistent['lats_01'] = round_to_nearest_01_degree(viirs_persistent.lats)
viirs_persistent['lons_01'] = round_to_nearest_01_degree(viirs_persistent.lons)

to_group = ['year', 'lats_01', 'lons_01']
to_agg = {'frp': np.mean,
          'lats': np.mean,
          'lons': np.mean}
viirs_persistent = viirs_persistent.groupby(to_group, as_index=False).agg(to_agg)
viirs_persistent['years_seen'] = 1

to_group = ['lats_01', 'lons_01']
to_agg = {'years_seen': np.sum}
viirs_persistent = viirs_persistent.groupby(to_group, as_index=False).agg(to_agg)

# find locations seen in both 2012 and 2017 data
viirs_persistent = viirs_persistent[viirs_persistent.years_seen == 2][['lats_01', 'lons_01']]
print 'total persistent viirs sites seen in 2012 and 2017:', viirs_persistent.shape[0]









    



total persistent viirs sites seen in 2012 and 2017: 1022



In [545]:

    
# merge on the persistent sites to reduce the viirs data
viirs_sites = viirs_iraq_hotspot_df.copy()
viirs_sites['lats_01'] = round_to_nearest_01_degree(viirs_sites.lats)
viirs_sites['lons_01'] = round_to_nearest_01_degree(viirs_sites.lons)
viirs_sites = pd.merge(viirs_sites, viirs_persistent, on=['lats_01', 'lons_01'])

# load in comparison sites
ats_sites = ats_flare_df[(ats_flare_df.year == 2012) & (ats_flare_df.month == 3)].copy()
sls_sites = sls_hotspot_df_0317.copy()

ats_sites['lats_01'] = round_to_nearest_01_degree(ats_sites.lats)
ats_sites['lons_01'] = round_to_nearest_01_degree(ats_sites.lons)

sls_sites['lats_01'] = round_to_nearest_01_degree(sls_sites.lats)
sls_sites['lons_01'] = round_to_nearest_01_degree(sls_sites.lons)



In [546]:

    
# now group the clusters by time stamp and sum frp for each time stamp
to_group = ['year', 'month', 'day', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.sum}
viirs_sites_grouped = viirs_sites.groupby(to_group, as_index=False).agg(to_agg)
ats_sites_grouped = ats_sites.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_grouped = sls_sites.groupby(to_group, as_index=False).agg(to_agg)

# now get the typical behaviour for each cluster over the year and month
to_group = ['year', 'month', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.mean, 'frp_std': np.std}

viirs_sites_grouped['frp_std'] = viirs_sites_grouped.frp.copy()
ats_sites_grouped['frp_std'] = ats_sites_grouped.frp.copy()
sls_sites_grouped['frp_std'] = sls_sites_grouped.frp.copy()

viirs_sites_means = viirs_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
ats_sites_means = ats_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_means = sls_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)



In [547]:

    
# now merge on year and cluster
viirs_sites_means.rename(columns={'frp': 'frp_viirs', 'frp_std': 'frp_std_viirs'}, inplace=True)
ats_sites_means.rename(columns={'frp': 'frp_ats', 'frp_std': 'frp_std_ats'}, inplace=True)
sls_sites_means.rename(columns={'frp': 'frp_sls', 'frp_std': 'frp_std_sls'}, inplace=True)

on = ['year', 'lats_01', 'lons_01']
cols_ats = ['year', 'lats_01', 'lons_01', 'frp_ats', 'frp_std_ats']
cols_sls = ['year', 'lats_01', 'lons_01', 'frp_sls', 'frp_std_sls']
cols_viirs = ['year', 'lats_01', 'lons_01', 'frp_viirs', 'frp_std_viirs', 'lats', 'lons']

ats_vs_viirs = pd.merge(viirs_sites_means[cols_viirs], ats_sites_means[cols_ats], on=on)
sls_vs_viirs =  pd.merge(viirs_sites_means[cols_viirs], sls_sites_means[cols_sls], on=on)
ats_vs_sls = pd.merge(ats_sites_means[cols_ats], sls_sites_means[cols_sls], on=['lats_01', 'lons_01'])



In [548]:

    
# mask
#ats_vs_viirs = ats_vs_viirs[(ats_vs_viirs.frp_ats < 150) & (ats_vs_viirs.frp_viirs < 150)]
#sls_vs_viirs = sls_vs_viirs[(sls_vs_viirs.frp_sls < 150) & (sls_vs_viirs.frp_viirs < 150)]

max_lat = 36
min_lat = 20
min_lon = 42
max_lon = 60
ats_vs_viirs = ats_vs_viirs[(ats_vs_viirs.lats < max_lat) & 
                            (ats_vs_viirs.lats > min_lat) & 
                            (ats_vs_viirs.lons > min_lon) & 
                            (ats_vs_viirs.lons < max_lon)]

sls_vs_viirs = sls_vs_viirs[(sls_vs_viirs.lats < max_lat) & 
                            (sls_vs_viirs.lats > min_lat) & 
                            (sls_vs_viirs.lons > min_lon) & 
                            (sls_vs_viirs.lons < max_lon)]



In [768]:

    
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())


# plot a scale bar with 4 subdivisions on the left side of the map
def scale_bar_left(ax, bars=1, length=None, location=(0.1, 0.05), linewidth=3, col='black'):
    """
    ax is the axes to draw the scalebar on.
    bars is the number of subdivisions of the bar (black and white chunks)
    length is the length of the scalebar in km.
    location is left side of the scalebar in axis coordinates.
    (ie. 0 is the left side of the plot)
    linewidth is the thickness of the scalebar.
    color is the color of the scale bar
    """
    # Get the limits of the axis in lat long
    llx0, llx1, lly0, lly1 = ax.get_extent(ccrs.PlateCarree())
    # Make tmc aligned to the left of the map,
    # vertically at scale bar location
    sbllx = llx0 + (llx1 - llx0) * location[0]
    sblly = lly0 + (lly1 - lly0) * location[1]
    tmc = ccrs.TransverseMercator(sbllx, sblly)
    # Get the extent of the plotted area in coordinates in metres
    x0, x1, y0, y1 = ax.get_extent(tmc)
    # Turn the specified scalebar location into coordinates in metres
    sbx = x0 + (x1 - x0) * location[0]
    sby = y0 + (y1 - y0) * location[1]

    # Calculate a scale bar length if none has been given
    # (Theres probably a more pythonic way of rounding the number but this works)
    if not length:
        length = (x1 - x0) / 5000  # in km
        ndim = int(np.floor(np.log10(length)))  # number of digits in number
        length = round(length, -ndim)  # round to 1sf

        # Returns numbers starting with the list
        def scale_number(x):
            if str(x)[0] in ['1', '2', '5']:
                return int(x)
            else:
                return scale_number(x - 10 ** ndim)

        length = scale_number(length)

    # Generate the x coordinate for the ends of the scalebar
    bar_xs = [sbx, sbx + length * 1000 / bars]
    # Plot the scalebar chunks
    barcol = 'black'
    for i in range(0, bars):
        # plot the chunk
        ax.plot(bar_xs, [sby, sby], transform=tmc, color=barcol, linewidth=linewidth)
        # alternate the colour
        if barcol == 'white':
            barcol = 'dimgrey'
        else:
            barcol = 'white'
        # Generate the x coordinate for the number
        bar_xt = sbx + i * length * 1000 / bars
        # Plot the scalebar label for that chunk
#         ax.text(bar_xt, sby, str(int(i * length / bars)), transform=tmc,
#                 horizontalalignment='center', verticalalignment='bottom',
#                 color=col)
        # work out the position of the next chunk of the bar
        bar_xs[0] = bar_xs[1]
        bar_xs[1] = bar_xs[1] + length * 1000 / bars
    # Generate the x coordinate for the last number
    bar_xt = sbx + length * 1000
    # Plot the last scalebar label
    ax.text(bar_xt/2., sby, str(int(length))+ ' km', transform=tmc,
            horizontalalignment='center', verticalalignment='bottom',
            color=col)
    # Plot the unit label below the bar
    bar_xt = sbx + length * 1000 / 2
    bar_yt = y0 + (y1 - y0) * (location[1] / 4)
    ax.text(bar_xt, bar_yt, 'km', transform=tmc, horizontalalignment='center',
            verticalalignment='bottom', color=col)

plt.close('all')
fig = plt.figure(figsize=(18, 5))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133, projection=ccrs.PlateCarree())

h = ax0.hexbin(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, gridsize=45, mincnt=1,
          cmap='Blues', linewidths=0.5, edgecolors='k',extent=[-5, 200, -5, 200])
cbar = plt.colorbar(mappable=h, ax=ax0).set_label('Sample Count', fontsize=15)

h = ax1.hexbin(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, gridsize=45, mincnt=1,
          cmap='Reds', linewidths=0.5, edgecolors='k', extent=[-5, 200, -5, 200])
cbar = plt.colorbar(mappable=h, ax=ax1).set_label('Sample Count', fontsize=15)

# sns.regplot(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, ci=95, color='k', ax=ax0, scatter=False)
# sns.regplot(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, ci=95, color='k', ax=ax1, scatter=False)

ax0.text(0.90, 0.02, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.90, 0.02, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.93, 0.02, '(c)', transform=ax2.transAxes, fontsize=14)


x_lim = (-5,200)
y_lim = (-5,200)
ax0.plot(x_lim, y_lim, color='k', linestyle='-', linewidth=1)
ax1.plot(x_lim, y_lim, color='k', linestyle='-', linewidth=1)

# ax0.set_xlim(x_lim)
# ax1.set_xlim(x_lim)

# ax0.set_ylim(y_lim)
# ax1.set_ylim(y_lim)

ax0.set(ylabel='AATSR Mean Cluster FRP (MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)

ax1.set(ylabel='SLSTR Mean Cluster FRP (MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax1.yaxis.label.set_size(12)
ax1.xaxis.label.set_size(12)

slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(ats_vs_viirs.frp_viirs, 
                                                            ats_vs_viirs.frp_ats)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(sls_vs_viirs.frp_viirs, 
                                                            sls_vs_viirs.frp_sls)

# zero intercept reg
x = sls_vs_viirs.frp_viirs.values[:,np.newaxis]
y = sls_vs_viirs.frp_sls.values
a, _, _, _ = np.linalg.lstsq(x, y)


ats_median = np.median(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_median = np.median(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)
print 'ats median:', ats_median
print 'sls median:', sls_median

ats_mean = np.mean(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_mean = np.mean(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)

ats_sd = np.std(ats_vs_viirs.frp_ats - ats_vs_viirs.frp_viirs)
sls_sd = np.std(sls_vs_viirs.frp_sls - sls_vs_viirs.frp_viirs)

# # adjust sls data
adjusted_sls = sls_vs_viirs.frp_sls.values*1.11
print 
#print 'sls scaling factor: ', 1/a
print 'adjusted sls median:', np.median(adjusted_sls - sls_vs_viirs.frp_viirs )
print 'adjusted sls mean:', np.mean(adjusted_sls - sls_vs_viirs.frp_viirs )
print 'adjusted sls sd:', np.std(adjusted_sls - sls_vs_viirs.frp_viirs)

textstr0 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value0**2, ats_mean, ats_sd, ats_vs_viirs.frp_viirs.shape[0])
textstr1 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value1**2, sls_mean, sls_sd, sls_vs_viirs.frp_viirs.shape[0]) 

props = dict(boxstyle='round', facecolor='white', alpha=0.5)

ax0.text(0.05, 0.95, textstr0, transform=ax0.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
ax1.text(0.05, 0.95, textstr1, transform=ax1.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)


# do sampling map
# xlims = [(-180, 180), (-105, -87), (4, 9), (46, 56), (65, 82), (106, 125)]
#     ylims = [(-90, 90), (25, 39), (3, 7), (23, 33), (55, 68), (33, 45)]
#     pos = [(-102, 40), (-2, -1), (39, 26), (83, 62), (113, 47)]
ax2.set_xlim((min_lon, max_lon))
ax2.set_ylim((min_lat, max_lat))
ax2.coastlines()
gl = ax2.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
                  linewidth=1, color='gray', alpha=0.5, linestyle='--')
ax2.plot(sls_vs_viirs.lons, sls_vs_viirs.lats, 'rx', alpha=0.5, markersize=10, label='SLSTR & VIIRS')
ax2.plot(ats_vs_viirs.lons, ats_vs_viirs.lats, 'b.', alpha=0.2, markersize=10, label='AATSR & VIIRS')
ax2.arrow(58, 30, 0, 2, head_width=0.5, transform=ccrs.PlateCarree())
ax2.text(57.65, 33, 'N', transform=ccrs.PlateCarree(), fontsize=14)
ax2.legend(prop={'size': 15})
scale_bar_left(ax2, bars=1, length=250, location=(0.05, 0.25), linewidth=3)

plt.savefig(os.path.join(output_path, 'viirs_vs_ats_iraq.png'), bbox_inches='tight', dpi=600)
plt.show()









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:127: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.
To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.






    



ats median: -0.020078545120291613
sls median: 0.023096082296040876

adjusted sls median: 0.2990576222551111
adjusted sls mean: 1.1614616052521587
adjusted sls sd: 16.023233824529775

Extended SLSTR Bias Assessment



In [106]:

    
# merge on the persistent sites to reduce the viirs data
viirs_got_sites = viirs_got_hotspot_df.copy()
viirs_got_sites['lats_01'] = round_to_nearest_01_degree(viirs_got_sites.lats)
viirs_got_sites['lons_01'] = round_to_nearest_01_degree(viirs_got_sites.lons)
mask_got = ((viirs_got_sites.lats >= 5) & (viirs_got_sites.lats <= 11) & 
            (viirs_got_sites.lons >= 100) & (viirs_got_sites.lons <= 108))
viirs_got_sites = viirs_got_sites[mask_got]

viirs_libya_sites = viirs_libya_hotspot_df.copy()
viirs_libya_sites['lats_01'] = round_to_nearest_01_degree(viirs_libya_sites.lats)
viirs_libya_sites['lons_01'] = round_to_nearest_01_degree(viirs_libya_sites.lons)
mask_libya = ((viirs_libya_sites.lats >= 27) & (viirs_libya_sites.lats <= 34) & 
            (viirs_libya_sites.lons >= 2) & (viirs_libya_sites.lons <= 12))
viirs_libya_sites = viirs_libya_sites[mask_libya]

viirs_oman_sites = viirs_oman_hotspot_df.copy()
viirs_oman_sites['lats_01'] = round_to_nearest_01_degree(viirs_oman_sites.lats)
viirs_oman_sites['lons_01'] = round_to_nearest_01_degree(viirs_oman_sites.lons)
mask_oman = ((viirs_oman_sites.lats >= 16) & (viirs_oman_sites.lats <= 22) & 
             (viirs_oman_sites.lons >= 51) & (viirs_oman_sites.lons <= 60))
viirs_oman_sites = viirs_oman_sites[mask_oman]

# load in comparison sls sites
sls_sites = sls_hotspot_df_0317.copy()
sls_sites['lats_01'] = round_to_nearest_01_degree(sls_sites.lats)
sls_sites['lons_01'] = round_to_nearest_01_degree(sls_sites.lons)



In [107]:

    
# now group the clusters by time stamp and sum frp for each time stamp
to_group = ['year', 'month', 'day', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.sum}

viirs_got_grouped = viirs_got_sites.groupby(to_group, as_index=False).agg(to_agg)
viirs_libya_grouped = viirs_libya_sites.groupby(to_group, as_index=False).agg(to_agg)
viirs_oman_grouped = viirs_oman_sites.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_grouped = sls_sites.groupby(to_group, as_index=False).agg(to_agg)

# now get the typical behaviour for each cluster over the year and month
to_group = ['year', 'month', 'sensor', 'lats_01', 'lons_01']
to_agg = {'lats': np.mean, 'lons':np.mean, 'frp': np.mean}

viirs_got_means = viirs_got_grouped.groupby(to_group, as_index=False).agg(to_agg)
viirs_libya_means = viirs_libya_grouped.groupby(to_group, as_index=False).agg(to_agg)
viirs_oman_means = viirs_oman_grouped.groupby(to_group, as_index=False).agg(to_agg)
sls_sites_means = sls_sites_grouped.groupby(to_group, as_index=False).agg(to_agg)



In [108]:

    
# now merge on year and cluster
viirs_got_means.rename(columns={'frp': 'frp_viirs', }, inplace=True)
viirs_libya_means.rename(columns={'frp': 'frp_viirs'}, inplace=True)
viirs_oman_means.rename(columns={'frp': 'frp_viirs'}, inplace=True)

sls_sites_means.rename(columns={'frp': 'frp_sls', 'frp_std': 'frp_std_sls'}, inplace=True)

on = ['year', 'lats_01', 'lons_01']
cols_sls = ['year', 'lats_01', 'lons_01', 'frp_sls']
cols_viirs = ['year', 'lats_01', 'lons_01', 'frp_viirs', 'lats', 'lons']

sls_vs_viirs_got = pd.merge(viirs_got_means[cols_viirs], sls_sites_means[cols_sls], on=on)
sls_vs_viirs_libya =  pd.merge(viirs_libya_means[cols_viirs], sls_sites_means[cols_sls], on=on)
sls_vs_viirs_oman = pd.merge(viirs_oman_means[cols_viirs], sls_sites_means[cols_sls], on=on)



In [109]:

    
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

plt.close('all')
fig = plt.figure(figsize=(18, 5))
ax0 = plt.subplot(131)
ax1 = plt.subplot(132)
ax2 = plt.subplot(133)

h = ax0.hexbin(sls_vs_viirs_got.frp_viirs, sls_vs_viirs_got.frp_sls, gridsize=100, mincnt=1,
          cmap='Blues', linewidths=0.5, edgecolors='k',extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax0).set_label('Sample Count', fontsize=15)

h = ax1.hexbin(sls_vs_viirs_libya.frp_viirs, sls_vs_viirs_libya.frp_sls, gridsize=45, mincnt=1,
          cmap='Reds', linewidths=0.5, edgecolors='k', extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax1).set_label('Sample Count', fontsize=15)

h = ax2.hexbin(sls_vs_viirs_oman.frp_viirs, sls_vs_viirs_oman.frp_sls, gridsize=45, mincnt=1,
          cmap='Greys', linewidths=0.5, edgecolors='k', extent=[-5, 50, -5, 50])
cbar = plt.colorbar(mappable=h, ax=ax2).set_label('Sample Count', fontsize=15)

# sns.regplot(ats_vs_viirs.frp_viirs, ats_vs_viirs.frp_ats, ci=95, color='k', ax=ax0, scatter=False)
# sns.regplot(sls_vs_viirs.frp_viirs, sls_vs_viirs.frp_sls, ci=95, color='k', ax=ax1, scatter=False)

ax0.text(0.90, 0.02, '(a)', transform=ax0.transAxes, fontsize=14)
ax1.text(0.90, 0.02, '(b)', transform=ax1.transAxes, fontsize=14)
ax2.text(0.90, 0.02, '(c)', transform=ax2.transAxes, fontsize=14)


# set limits
ax0.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)
ax1.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)
ax2.plot((0,200), (0,200), color='k', linestyle='-', linewidth=1)


ax0.set_xlim((0,20))
ax1.set_xlim((0,50))
ax2.set_xlim((0,30))

ax0.set_ylim((0,20))
ax1.set_ylim((0,50))
ax2.set_ylim((0,30))


ax0.set(ylabel='SLS Mean Cluster FRP(MW)', xlabel='VIIRS Mean Cluster FRP (MW)')
ax0.yaxis.label.set_size(12)
ax0.xaxis.label.set_size(12)

ax1.set(xlabel='VIIRS Mean Cluster FRP (MW)')
ax1.xaxis.label.set_size(12)

ax2.set(xlabel='VIIRS Mean Cluster FRP (MW)')
ax2.xaxis.label.set_size(12)

slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(sls_vs_viirs_got.frp_viirs, 
                                                            sls_vs_viirs_got.frp_sls)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(sls_vs_viirs_libya.frp_viirs, 
                                                            sls_vs_viirs_libya.frp_sls)
slope2, intercept2, r_value2, _, _ = scipy.stats.linregress(sls_vs_viirs_oman.frp_viirs, 
                                                            sls_vs_viirs_oman.frp_sls)

# zero intercept reg
# x = sls_vs_viirs.frp_viirs.values[:,np.newaxis]
# y = sls_vs_viirs.frp_sls.values
# a, _, _, _ = np.linalg.lstsq(x, y)


got_median = np.median(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_median = np.median(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_median = np.median(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)

print 'got median:', got_median
print 'libya median:', libya_median
print 'oman median:', oman_median

got_mean = np.mean(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_mean = np.mean(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_mean = np.mean(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)

got_sd = np.std(sls_vs_viirs_got.frp_viirs - sls_vs_viirs_got.frp_sls)
libya_sd = np.std(sls_vs_viirs_libya.frp_viirs - sls_vs_viirs_libya.frp_sls)
oman_sd = np.std(sls_vs_viirs_oman.frp_viirs - sls_vs_viirs_oman.frp_sls)

textstr0 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value0**2, got_mean, got_sd, sls_vs_viirs_got.frp_viirs.shape[0])
textstr1 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value1**2, libya_mean, libya_sd, sls_vs_viirs_libya.frp_viirs.shape[0]) 
textstr2 = '$R^2=%.3f$\n Bias=%.2f\n $\sigma$=%.2f\n Samples$=%.0f$' % (r_value2**2, oman_mean, oman_sd, sls_vs_viirs_oman.frp_viirs.shape[0]) 


props = dict(boxstyle='round', facecolor='white', alpha=0.5)

ax0.text(0.05, 0.95, textstr0, transform=ax0.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
ax1.text(0.05, 0.95, textstr1, transform=ax1.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
ax2.text(0.05, 0.95, textstr2, transform=ax2.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)


#plt.savefig(os.path.join(output_path, 'viirs_vs_ats_iraq.png'), bbox_inches='tight', dpi=600)
plt.show()









    



got median: 0.3063349330221503
libya median: -0.2634814719215386
oman median: -0.3981082566616001

Percent of ATS and SLSTR flare <1 MW



In [110]:

    
ats_grouped_flares = ats_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})
print (ats_grouped_flares.frp < 1).sum() * 1.0/ ats_grouped_flares.frp.shape[0] * 100

sls_grouped_flares = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean})
print (sls_grouped_flares.frp < 1).sum() * 1.0/ sls_grouped_flares.frp.shape[0] * 100









    



35.59789901468389
39.096662830840046



In [111]:

    
# For RAL to determine small persistent flaring sites
sls_flare_positions = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin'], as_index=False).agg({'frp': np.mean, 'sample_counts_flare':np.sum, 'lats':np.mean, 'lons':np.mean})
sls_flare_positions_subset = sls_flare_positions[(sls_flare_positions.frp < 1.5) & (sls_flare_positions.sample_counts_flare > 20)]

fig = plt.figure(figsize=(12, 5))
ax = plt.subplot(111, projection=ccrs.PlateCarree())
ax.coastlines()
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
                  linewidth=1, color='gray', alpha=0.5, linestyle='--')
ax.plot(sls_flare_positions_subset.lons, sls_flare_positions_subset.lats, 'r.', alpha=1, markersize=4, label='> 20 obs & < 1.5 MW')
ax.legend(prop={'size': 15})
plt.savefig('slstr_persistent_flares_for_ral.png', bbox_inches='tight', dpi=600)

sls_flare_positions_subset.to_csv('persistent_slstr_flare_locations_lt1_5mw_gt20obs.csv')

------- COUNTRY LEVEL ANALYSES -------

First restrict slstr data to an annum. Currently we have contiguous data from June 2017 through May 2018, so lets set this as the annum that we are using. Think we can justify as it is the most up to date dataset and we are just doing it as a demonstration. If not we get and process all the data, asking for it off CEMS



In [394]:

    
# adjust SLSTR year
sls_flare_df_annum.loc[:, 'year'] = 2018
sls_flare_sampling_df_annum.loc[:, 'year'] = 2018

ANNUAL TIMESERIES: ANNUAL ACTIVE FLARES

Ratio of the number of flares seen active in any given country over a 12 month period to all flares seen active in the given country over the entire timeseries (both ATS and SLS.



In [395]:

    
def counts_by_country(ats_flare_df, sls_flare_df, ats_countries_df, sls_countries_df):
    
    flare_df = pd.concat([ats_flare_df, sls_flare_df])
    countries_df = pd.concat([ats_countries_df, sls_countries_df])
    
    # rename years where we have data from both sensors (1996 AT1 & AT2; 2002 AT2 & ATS )    
    flare_df.sensor.loc[flare_df.year == 1996] = 'AT1 & AT2'
    flare_df.sensor.loc[flare_df.year == 2002] = 'AT2 & ATS'
    
    # get unique countries
    countries_df.drop_duplicates(['countries', 'lats_arcmin', 'lons_arcmin'], inplace=True)
    
    # get unique annual flares
    annual_unique_flares = flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin', 'year'])[['lats_arcmin', 'lons_arcmin', 'year']]    
    annual_unique_flares_by_sensor = flare_df.drop_duplicates(['lats_arcmin', 'lons_arcmin', 'year', 'sensor'])[['lats_arcmin', 'lons_arcmin', 'year', 'sensor']]
        
    # merge countries
    annual_unique_flares = annual_unique_flares.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])
    annual_unique_flares_by_sensor = annual_unique_flares_by_sensor.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])

    # add active flares and flare locations columns
    annual_unique_flares['active_flare_locations'] = 1
    annual_unique_flares_by_sensor['active_flare_locations'] = 1
    
    # now group 
    grouped_annual_unique_flares = annual_unique_flares.groupby(['year', 'countries'], as_index=False).agg(
        {'active_flare_locations': np.sum})
    grouped_annual_unique_flares_by_sensor = annual_unique_flares_by_sensor.groupby(['year', 'countries', 'sensor'], as_index=False).agg(
        {'active_flare_locations': np.sum})
    
    # divide by 1000 (to get counts in 000s)
    grouped_annual_unique_flares['active_flare_locations'] = grouped_annual_unique_flares.active_flare_locations /1000
    grouped_annual_unique_flares_by_sensor['active_flare_locations'] = grouped_annual_unique_flares_by_sensor.active_flare_locations /1000
    

    return grouped_annual_unique_flares, grouped_annual_unique_flares_by_sensor



In [396]:

    
annual_flare_activity_df, annual_flare_activity_by_sensor_df = counts_by_country(ats_flare_df, 
                                                                                 sls_flare_df_annum, 
                                                                                 ats_countries_df, 
                                                                                 sls_countries_df)









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  This is separate from the ipykernel package so we can avoid doing imports until



In [397]:

    
#for yr in [1991, 2012]:
#    annual_flare_activity_by_sensor_df = annual_flare_activity_by_sensor_df[annual_flare_activity_by_sensor_df.year != yr]

# get the global annual FRP values
global_activity = annual_flare_activity_by_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'active_flare_locations': np.sum})
global_activity = global_activity.pivot('year', 'sensor', 'active_flare_locations')
global_activity.columns.name = None
global_activity.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_activity = global_activity[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]

to_group = ['countries', 'year']
to_agg = {'active_flare_locations': np.sum}
screened_countries_activity_df = annual_flare_activity_by_sensor_df.groupby(to_group, as_index=False).agg(to_agg)

# # find the top n flaring countries
n_countries = 30
top_flaring_countries = screened_countries_activity_df.groupby('countries').agg({'active_flare_locations': np.sum}).sort_values('active_flare_locations', ascending=False).head(n_countries).reset_index()

# # reduce to the top countries
top_country_activity = screened_countries_activity_df.merge(top_flaring_countries[['countries']],  on=['countries'])

# pivoted
pivoted_df = top_country_activity.pivot('year', 'countries', 'active_flare_locations')
df_list = []
for c in top_flaring_countries.countries.values:
    df_list.append(pivoted_df[c])
top_country_activity = pd.concat(df_list, axis=1)

Combined plot



In [398]:

    
pc_plot_counts = pd.DataFrame(top_country_activity.values / annual_flare_activity_by_sensor_df.groupby(['year'], as_index=True).agg({'active_flare_locations': np.sum}).values * 100,
             columns=top_country_activity.T.index, 
             index=top_country_activity.index)



In [399]:

    
# sort out diff plot showing only years with reasonable sampling

diff_plot = pc_plot_counts.copy(deep=True)
diff_plot.drop([1991,1992,2012], axis=0, inplace=True)
diff_plot = diff_plot - diff_plot.shift()
diff_plot.loc[1991] = np.nan
diff_plot.loc[1992] = np.nan
diff_plot.loc[2012] = np.nan
diff_plot.sort_index(inplace=True)



In [400]:

    
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))

fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1, 
                               gridspec_kw={'height_ratios': [2, 7.5, 7.5]},
                               figsize=(15, 22))

sns.heatmap(global_activity.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13},
            fmt='.1f',
            cbar=False,
            cmap='Greys_r',
            vmax=18,
            ax=ax0)

ax0.set(xlabel='', ylabel="a) Global Count ($x10^3$)")
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)

sns.heatmap(pc_plot_counts.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13}, 
            fmt='.1f',
            cbar=False,
            cmap='viridis',
            vmax = 22,
            ax=ax1)
ax1.set(xlabel='', ylabel='b) Fraction of Global Count (%)')
ax1.yaxis.label.set_size(20)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)


sns.heatmap(diff_plot.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13}, 
            fmt='.1f',
            cbar=False,
            cmap='coolwarm',
            vmax=3, vmin=-3,
            ax=ax2)
ax2.set(xlabel='Year', ylabel='c) Annual Change in Fraction of Global Count (%)')
ax2.yaxis.label.set_size(20)
ax2.xaxis.label.set_size(20)

ax2.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax2.set_yticklabels(ax1.get_yticklabels(), fontsize=12)

plt.savefig(os.path.join(output_path, 'flare_counts_analysis.png'), bbox_inches='tight', dpi=600)

plt.show()

Figure Comments: The global figure shows all counts for all sensors (exlcuding 2016 data for SLS as no flare observations). The country level analysis uses only the valid non-overlapping years (which are defined in the code), so: AT2 1995; AT1 1996; ATS 2002; & AT2 2003 are not used). This needs to be done, as the geospatial alginment is not exact and we cannot be certain on the total counts if we combine sensors (i.e. perhaps double counting some flares).

Percent Change



In [401]:

    
activity_global_pc_change = annual_flare_activity_by_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'active_flare_locations': np.sum})
activity_global_pc_change = activity_global_pc_change.sort_values('year')
activity_global_pc_change['pc_change'] = (activity_global_pc_change.active_flare_locations / activity_global_pc_change.active_flare_locations.shift() - 1) 
activity_global_pc_change.drop('active_flare_locations', axis=1, inplace=True)
activity_global_pc_change = activity_global_pc_change.pivot('year', 'sensor', 'pc_change')
activity_global_pc_change.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
activity_global_pc_change = activity_global_pc_change[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]


activity_country_pc_change = top_country_activity.copy()
activity_country_pc_change = (activity_country_pc_change / activity_country_pc_change.shift() - 1) 

for yr in [1992]:
    activity_global_pc_change = activity_global_pc_change[activity_global_pc_change.index != yr]
    activity_country_pc_change = activity_country_pc_change[activity_country_pc_change.index != yr]



In [402]:

    
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))

fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, 
                               gridspec_kw={'height_ratios': [2, 7.5]},
                               figsize=(15, 12))

sns.heatmap(activity_global_pc_change.T*100, linewidths=1,
            robust=True, annot=True, 
            fmt='.0f', annot_kws={'fontsize':13}, 
            cbar=False,
            cmap='coolwarm',
            vmax=10, vmin=-10,
            ax=ax0)

ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)


sns.heatmap(activity_country_pc_change.T*100, linewidths=1,
            robust=True, annot=True, 
            fmt='.0f', annot_kws={'fontsize':13}, 
            cbar=False,
            cmap='coolwarm',
            vmax=50, vmin=-50,
            ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)

ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)

plt.savefig(os.path.join(output_path, 'ats_counts_change.png'), bbox_inches='tight', dpi=600)

plt.show()

COMPUTE ANNUAL MEAN CLOUD ADJUSTED FRP PER COUNTRY

First group each arcminute grid cell for each year in the flare dataframe, aggregating the fire radiative power and the total number of observations in that year by summing. Do the same for the sampling dataframe (i.e. the number of overpass for a given flare).

The cloud adjusted mean is computed as the sum of the hotspot's FRP for the year divided by the total number of overpasses for the hotspot adjusted for cloud cover e.g. 6000 [MW] / ( 120 [Obs.] * (1 - 0.6 [CC]). So we in effect get the mean FRP based on some assumption of number of cloud free observations (this also normalises for variance in observation). The cloud cover I estimate from an 8 by 8 window around each hotspot, from which I take the ATSR cloud flag. This is then averaged for the year, so I get an annual mean cloud cover estimate for each hotspot.

what happens if the "hotspot" is observed to be not there? i.e. a location previously seen to be a flare is cloud free - but now there is nothing there (i.e. flare is turned off for a while...maybe it is re-detected in a few weeks or months?). Do these "zero" FRP values contribute to the "mean FRP" or not? it seems not from your calculation. if they do not then multippying the cloud-adjusted Mean FRP by the number of secs in a year will not i think come up with "correct FRE" will it? Because you are then counting the thing has burning at its "mean FRP" value even when it has been observed as not burning? When there has been a cloud free observation of the flares location and it is confirmed that there is "nothing there" then that time period should not contribute to the FRE calculation..or...the "mean FRE" should include the value of "zero FRP" observed for that observation.

The number of observations includes not only active flaring, but all overpasses of the flaring location during the year. So in the above example the 6000MW could be calculated from 50 of the 120 overpasses, but all 120 (including the 70 'zero' observations) are used for the mean (although this number is further adjusted for cloud cover). So it does take into account the issue of flare persistence.

Two different dataframe are created. One with sensor information and one without. The one without sensor information effectively creates a mean value for any overlapping years. This is unwanted, but useful in any analysis. The one with sensor information allows screening of year/sensor combinations so that we don't get any overlapping information contaminating the results.



In [403]:

    
def frp_by_country(flare_df, sampling_df, countries_df):
    
    # get rid of the corrupted ATS orbit (see Malaysia/Indonesia analysis below for details)
    bad_orbit_flare = (flare_df.year == 2011) & (flare_df.month == 10) & (flare_df.day == 10) & (flare_df.hhmm == 1240)
    bad_orbit_sampling = (sampling_df.year == 2011) & (sampling_df.month == 10) & (sampling_df.day == 10) & (sampling_df.hhmm == 1240)
    
    # apply bad orbit mask
    print flare_df.shape
    flare_df = flare_df.loc[~bad_orbit_flare, :]
    print flare_df.shape
    sampling_df = sampling_df.loc[~bad_orbit_sampling, :]
    
    # compute the annual mean for each flare
    to_group = ['lats_arcmin', 'lons_arcmin', 'year']
    to_agg = {'frp': np.sum,
              'sample_counts_flare': np.sum,
              'lats': np.mean,
              'lons': np.mean}
    annual_summed_flare_df = flare_df.groupby(to_group, as_index=False).agg(to_agg)
    
    # get the cloud cover adjusted sampling
    to_group = ['lats_arcmin', 'lons_arcmin', 'year']
    to_agg = {'cloud_cover': np.mean,
              'sample_counts_all': np.sum}
    annual_sample_df = sampling_df.groupby(to_group, as_index=False).agg(to_agg)
    
    print annual_sample_df.head()
    
    # merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
    annual_frp_df = pd.merge(annual_summed_flare_df, annual_sample_df, on=to_group)
    annual_frp_df['cloud_adjusted_sample_counts'] = annual_frp_df.sample_counts_all * (1-annual_frp_df.cloud_cover)

    # if expected less than actual, then set to actual number of obsercations
    mask = annual_frp_df['cloud_adjusted_sample_counts'] < annual_frp_df['sample_counts_flare']
    annual_frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = annual_frp_df.loc[mask, 'sample_counts_flare']

    # compute mean frp
    annual_frp_df['cloud_adjusted_mean_frp'] = annual_frp_df.frp / annual_frp_df.cloud_adjusted_sample_counts

    # merge countries
    annual_frp_df = annual_frp_df.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])

    # sum on a per country basis
    grouped_annual_frp = annual_frp_df.groupby(['year', 'countries'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})

    # ---------------------------------------
    # Same again but with sensor information
    # ---------------------------------------
    
    # rename years where we have data from both sensors (1996 AT1 & AT2; 2002 AT2 & ATS )    
    flare_df.sensor.loc[flare_df.year == 1996] = 'AT1 & AT2'
    flare_df.sensor.loc[flare_df.year == 2002] = 'AT2 & ATS'
    sampling_df.sensor.loc[sampling_df.year == 1996] = 'AT1 & AT2'
    sampling_df.sensor.loc[sampling_df.year == 2002] = 'AT2 & ATS'
    
    # compute the annual mean for each flare
    to_group = ['lats_arcmin', 'lons_arcmin', 'year', 'sensor']
    to_agg = {'frp': np.sum,
              'sample_counts_flare': np.sum,
              'lats': np.mean,
              'lons': np.mean}
    annual_summed_flare_df = flare_df.groupby(to_group, as_index=False).agg(to_agg)
    
    # get the cloud cover adjusted sampling
    to_group = ['lats_arcmin', 'lons_arcmin', 'year', 'sensor']
    to_agg = {'cloud_cover': np.mean,
              'sample_counts_all': np.sum}
    annual_sample_df = sampling_df.groupby(to_group, as_index=False).agg(to_agg)
    
    # merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
    annual_frp_df = pd.merge(annual_summed_flare_df, annual_sample_df, on=to_group)
    annual_frp_df['cloud_adjusted_sample_counts'] = annual_frp_df.sample_counts_all * (1-annual_frp_df.cloud_cover)

    # if expected less than actual, then set to actual number of obsercations
    mask = annual_frp_df['cloud_adjusted_sample_counts'] < annual_frp_df['sample_counts_flare']
    annual_frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = annual_frp_df.loc[mask, 'sample_counts_flare']

    # compute mean frp
    annual_frp_df['cloud_adjusted_mean_frp'] = annual_frp_df.frp / annual_frp_df.cloud_adjusted_sample_counts

    # merge countries
    annual_frp_df = annual_frp_df.merge(countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])

    # sum on a per country basis
    grouped_annual_frp_by_sensor = annual_frp_df.groupby(['year', 'countries', 'sensor'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
    
    return grouped_annual_frp, grouped_annual_frp_by_sensor, annual_frp_df



In [404]:

    
ats_annual_frp_by_country, ats_annual_frp_by_country_with_sensor, ats_annual_frp_df = frp_by_country(ats_flare_df, 
                                                                                 ats_flare_sampling_df, 
                                                                                 ats_countries_df)
sls_annual_frp_by_country, sls_annual_frp_by_country_wtih_sensor, sls_annual_frp_df = frp_by_country(sls_flare_df_annum, 
                                                                                  sls_flare_sampling_df_annum,
                                                                                  sls_countries_df)









    



(2980632, 22)
(2980506, 22)
   lats_arcmin  lons_arcmin    year  cloud_cover  sample_counts_all
0      -5355.0      -6751.0  1991.0     0.632766                 34
1      -5355.0      -6751.0  1992.0     0.632766                 96
2      -5355.0      -6751.0  1993.0     0.632766                113
3      -5355.0      -6751.0  1994.0     0.632766                107
4      -5355.0      -6751.0  1995.0     0.632766                 95






    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:51: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:52: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:54: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    



(119927, 19)
(119927, 19)
   lats_arcmin  lons_arcmin  year  cloud_cover  sample_counts_all
0      -5353.0      -6738.0  2018     0.722643                111
1      -5340.0      -6758.0  2018     0.653948                114
2      -5322.0      -6809.0  2018     0.694180                119
3      -5321.0      -6809.0  2018     0.694180                119
4      -5319.0      -6811.0  2018     0.701499                119



In [801]:

    
sls_annual_frp_df.head()









    Out[801]:







  
    
      
      lats_arcmin
      lons_arcmin
      year
      sensor
      lons
      frp
      lats
      sample_counts_flare
      cloud_cover
      sample_counts_all
      cloud_adjusted_sample_counts
      cloud_adjusted_mean_frp
      countries
      yearly_fre
    
  
  
    
      0
      -5353
      -6738
      2018
      sls
      -67.633333
      1.498850
      -53.883333
      1
      0.722643
      111
      30.786575
      0.048685
      Argentina
      1.535530
    
    
      1
      -5340
      -6758
      2018
      sls
      -67.966667
      3.115118
      -53.666667
      3
      0.653948
      114
      39.449903
      0.078964
      Argentina
      2.490522
    
    
      2
      -5322
      -6809
      2018
      sls
      -68.150000
      1.427702
      -53.366667
      2
      0.694180
      119
      36.392556
      0.039231
      Argentina
      1.237333
    
    
      3
      -5321
      -6809
      2018
      sls
      -68.150000
      0.835341
      -53.350000
      1
      0.694180
      119
      36.392556
      0.022954
      Argentina
      0.723957
    
    
      4
      -5319
      -6811
      2018
      sls
      -68.183333
      2.788271
      -53.316667
      1
      0.701499
      119
      35.521633
      0.078495
      Argentina
      2.475733



In [805]:

    
sls_annual_frp_df[sls_annual_frp_df.countries == "United States"].cloud_adjusted_sample_counts.mean()









    Out[805]:





43.43956455882724

We have cloud adjusted annual FRP for each valid flare. We Can use this information to create an "adjusted" monthly visualisation where the yearly frp is distributed over the observations, based onthe observations and thier weighting. It doesn't fully represent the true activity of the flare, but the assumed activity of the flare if it were operating contuously, which is not the case. But given observational limitations we cannot fully assign up time and this estimate is the best we can get.



In [406]:

    
# Get monhtly mean FRP for each flare (not normalised)
ats_monthly_frp_unnormalised = ats_flare_df.groupby(['lats_arcmin', 'lons_arcmin', 'year', 'month'], as_index=False).agg({'lats': np.mean, 'lons':np.mean, 'frp':np.mean})

# Sum monthly means to get yearly total of means and rename and merge onto monthly df
ats_yearly_frp_unnormalised = ats_monthly_frp_unnormalised.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'frp': np.sum})
ats_yearly_frp_unnormalised.rename(columns={'frp':'annual_frp'}, inplace=True)
ats_monthly_frp_unnormalised = ats_monthly_frp_unnormalised.merge(ats_yearly_frp_unnormalised[['annual_frp', 'year', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin', 'year'])

# determine the contribution of each month to the yearly total to generate a weight
ats_monthly_frp_unnormalised['frp_weight'] = ats_monthly_frp_unnormalised.frp / ats_monthly_frp_unnormalised.annual_frp

# Determin the total FRE for each flare from the normalised dataset
ats_annual_frp_df['yearly_fre'] = ats_annual_frp_df.cloud_adjusted_mean_frp * 3.154e+7  / 1000000 # multiply by seconds in one year to get fre the divide by mill to get MJ

# Merge the FRE annual totals onto the monthly dataset and distribute accross the months using the weights 
ats_monthly_frp_unnormalised = ats_monthly_frp_unnormalised.merge(ats_annual_frp_df[['yearly_fre', 'year', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin', 'year'])
ats_monthly_frp_unnormalised['monthly_fre'] = ats_monthly_frp_unnormalised.frp_weight * ats_monthly_frp_unnormalised.yearly_fre



In [666]:

    
# assess difference in sampling between sensors
annual_sampling = ats_hotspot_sampling_df.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'sample_counts_all':np.sum,
                                                                                              'sensor':np.unique})



In [667]:

    
annual_sampling = annual_sampling[~((annual_sampling.year == 1996) | (annual_sampling.year==2002))]



In [669]:

    
sensor_sampling = annual_sampling.groupby(['lats_arcmin', 'lons_arcmin', 'sensor'], as_index=False).agg({'sample_counts_all':np.median})



In [681]:

    
at1_sampling = sensor_sampling[sensor_sampling.sensor == 'at1']
at2_sampling = sensor_sampling[sensor_sampling.sensor == 'at2']
ats_sampling = sensor_sampling[sensor_sampling.sensor == 'ats']



In [692]:

    
at1_sampling.rename(columns={"sample_counts_all":"sample_counts_at1"}, inplace=True)
at2_sampling.rename(columns={"sample_counts_all":"sample_counts_at2"}, inplace=True)
ats_sampling.rename(columns={"sample_counts_all":"sample_counts_ats"}, inplace=True)









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/pandas/core/frame.py:4025: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)



In [705]:

    
at1_ats_sampling = at1_sampling.merge(ats_sampling, on=['lats_arcmin', 'lons_arcmin'])
at1_at2_sampling = at1_sampling.merge(at2_sampling, on=['lats_arcmin', 'lons_arcmin'])
at2_ats_sampling = at2_sampling.merge(ats_sampling, on=['lats_arcmin', 'lons_arcmin'])



In [707]:

    
plt.plot(at1_ats_sampling.sample_counts_at1, at1_ats_sampling.sample_counts_ats, 'r.')
plt.show()

plt.plot(at1_at2_sampling.sample_counts_at1, at1_at2_sampling.sample_counts_at2, 'r.')
plt.show()

plt.plot(at2_ats_sampling.sample_counts_at2, at2_ats_sampling.sample_counts_ats, 'r.')
plt.show()



In [708]:

    
odd_points = at1_ats_sampling[(at1_ats_sampling.sample_counts_at1 < 100) & (at1_ats_sampling.sample_counts_ats > 100)]



In [712]:

    
plt.plot(odd_points.lons_arcmin/100, odd_points.lats_arcmin/100, 'r.')
plt.show()



In [713]:

    
odd_points = at2_ats_sampling[(at2_ats_sampling.sample_counts_at2 < 100) & (at2_ats_sampling.sample_counts_ats > 100)]



In [725]:

    
plt.plot(odd_points.lons_arcmin/100, odd_points.lats_arcmin/100, 'r.')
plt.show()



In [638]:

    
# generate plots for gif generation ATS

years = np.arange(1994, 2012, 1)
months = np.arange(1,13, 1)

bin_x = np.arange(-180, 182, 1)
bin_y = np.arange(-90, 92, 1)

meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +0.5
meshed_y = meshed_y[:-1,:-1] +0.5




for y in years:
    for m in months:
        plt.close('all')
        sub_df = ats_hotspot_sampling_df[(ats_hotspot_sampling_df.year == y) &
                                              (ats_hotspot_sampling_df.month == m)]

        
        try:
            binned_data = stats.binned_statistic_2d(sub_df.lons_arcmin/100, sub_df.lats_arcmin/100, 
                                                    np.ones(sub_df.lons_arcmin.size), 'sum',
                                                    bins=[bin_x, bin_y]).statistic.T
        except:
            continue

        mask = binned_data ==0 
        binned_data = np.log10(binned_data)
        binned_data = np.ma.masked_array(binned_data, mask)

        # set up figure
        plt.close('all')
        fig = plt.figure(figsize=(20,15))
        fig.subplots_adjust(hspace=0.001, wspace=0.1)
        ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())

        ax_list = [ax1]
        for i, ax in enumerate(ax_list):
            ax.set_global()
            ax.outline_patch.set_edgecolor('black')
            ax.coastlines(color='black', linewidth=0.75)
            ax.gridlines(color='black', linewidth=0.75, alpha=0.5)


        ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
        ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)

        p = ax1.pcolormesh(meshed_x, meshed_y, binned_data, cmap='Reds',  transform=ccrs.PlateCarree(), vmax=3)
        
        
        cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02]) 
        cb1 = plt.colorbar(p, cax=cbaxes1, orientation="horizontal") 
        cb1.set_label('Log Count', fontsize=18)
        
        plt.savefig('figs/sampling/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
        #plt.show()









    



/Users/danielfisher/anaconda2/envs/kcl-globalgasflaring/lib/python2.7/site-packages/ipykernel_launcher.py:31: RuntimeWarning: divide by zero encountered in log10



In [407]:

    
# generate plots for gif generation ATS

years = np.arange(1994, 2012, 1)
months = np.arange(1,13, 1)

bin_x = np.arange(-180, 182, 2)
bin_y = np.arange(-90, 92, 2)

meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +1
meshed_y = meshed_y[:-1,:-1] +1


for y in years:
    for m in months:
        plt.close('all')
        sub_df = ats_monthly_frp_unnormalised[(ats_monthly_frp_unnormalised.year == y) &
                                              (ats_monthly_frp_unnormalised.month == m)]


        
        try:
            binned_data = stats.binned_statistic_2d(sub_df.lons, sub_df.lats, 
                                                    sub_df.frp, 'sum',
                                                    bins=[bin_x, bin_y])
            binned_data = binned_data.statistic.T
        except:
            continue


        mask = binned_data > 0
        binned_data = binned_data[mask]
        lons = meshed_x[mask] 
        lats = meshed_y[mask] 

        # set up figure
        plt.close('all')
        fig = plt.figure(figsize=(20,15))
        fig.subplots_adjust(hspace=0.001, wspace=0.1)
        ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())

        ax_list = [ax1]
        for i, ax in enumerate(ax_list):
            ax.set_global()
            ax.outline_patch.set_edgecolor('black')
            ax.coastlines(color='black', linewidth=0.75)
            ax.gridlines(color='black', linewidth=0.75, alpha=0.5)


        ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
        ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)

        scat = ax1.scatter(lons,lats,transform=ccrs.PlateCarree(),
                    s=binned_data, lw=0.5, 
                    c=binned_data,
                    cmap='plasma',
                    vmax=1000,
                    alpha=0.65)
        
        
        cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02]) 
        cb1 = plt.colorbar(scat, cax=cbaxes1, orientation="horizontal") 
        cb1.set_label('FRE (MJ)', fontsize=18)
        
        plt.savefig('figs/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
        #plt.show()



In [408]:

    
# Get monhtly mean FRP for each flare (not normalised)
sls_monthly_frp_unnormalised = sls_flare_df.groupby(['lats_arcmin', 'lons_arcmin', 'year', 'month'], as_index=False).agg({'lats': np.mean, 'lons':np.mean, 'frp':np.mean})

# Sum monthly means to get yearly total of means and rename and merge onto monthly df
sls_yearly_frp_unnormalised = sls_monthly_frp_unnormalised.groupby(['lats_arcmin', 'lons_arcmin', 'year'], as_index=False).agg({'frp': np.sum})
sls_yearly_frp_unnormalised.rename(columns={'frp':'annual_frp'}, inplace=True)
sls_monthly_frp_unnormalised = sls_monthly_frp_unnormalised.merge(sls_yearly_frp_unnormalised[['annual_frp', 'year', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin', 'year'])

# determine the contribution of each month to the yearly total to generate a weight
sls_monthly_frp_unnormalised['frp_weight'] = sls_monthly_frp_unnormalised.frp / sls_monthly_frp_unnormalised.annual_frp

# Determin the total FRE for each flare from the normalised dataset
sls_annual_frp_df['yearly_fre'] = sls_annual_frp_df.cloud_adjusted_mean_frp * 3.154e+7  / 1000000 # multiply by seconds in one year to get fre the divide by mill to get MJ

# Merge the FRE annual totals onto the monthly dataset and distribute accross the months using the weights 
sls_monthly_frp_unnormalised = sls_monthly_frp_unnormalised.merge(sls_annual_frp_df[['yearly_fre', 'year', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin', 'year'])
sls_monthly_frp_unnormalised['monthly_fre'] = sls_monthly_frp_unnormalised.frp_weight * sls_monthly_frp_unnormalised.yearly_fre



In [409]:

    
# generate plots for gif generation SLS

years = np.arange(2016, 2019, 1)
months = np.arange(1,13, 1)

bin_x = np.arange(-180, 182, 2)
bin_y = np.arange(-90, 92, 2)

meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
meshed_x = meshed_x[:-1,:-1] +1
meshed_y = meshed_y[:-1,:-1] +1


for y in years:
    for m in months:
        plt.close('all')
        sub_df = sls_monthly_frp_unnormalised[(sls_monthly_frp_unnormalised.year == y) &
                                              (sls_monthly_frp_unnormalised.month == m)]


        
        try:
            binned_data = stats.binned_statistic_2d(sub_df.lons, sub_df.lats, 
                                                    sub_df.frp, 'sum',
                                                    bins=[bin_x, bin_y])
            binned_data = binned_data.statistic.T
        except:
            continue


        mask = binned_data > 0
        binned_data = binned_data[mask]
        lons = meshed_x[mask] 
        lats = meshed_y[mask] 

        # set up figure
        plt.close('all')
        fig = plt.figure(figsize=(20,15))
        fig.subplots_adjust(hspace=0.001, wspace=0.1)
        ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())

        ax_list = [ax1]
        for i, ax in enumerate(ax_list):
            ax.set_global()
            ax.outline_patch.set_edgecolor('black')
            ax.coastlines(color='black', linewidth=0.75)
            ax.gridlines(color='black', linewidth=0.75, alpha=0.5)


        ax1.text(0.94, 0.92, "year: " + str(y), transform=ax1.transAxes, fontsize=25)
        ax1.text(0.94, 0.87, "month: " + str(m).zfill(2), transform=ax1.transAxes, fontsize=25)

        scat = ax1.scatter(lons,lats,transform=ccrs.PlateCarree(),
                    s=binned_data, lw=0.5, 
                    c=binned_data,
                    cmap='plasma',
                    vmax=1000,
                    alpha=0.65)
        
        
        cbaxes1 = fig.add_axes([0.315, 0.2, 0.4, 0.02]) 
        cb1 = plt.colorbar(scat, cax=cbaxes1, orientation="horizontal") 
        cb1.set_label('FRE (MJ)', fontsize=18)
        
        plt.savefig('figs/' + str(y)+str(m).zfill(2)+'.png', bbox_inches='tight')
        #plt.show()

DMSP ANNUAL NATIONAL FLARED VOLUME COMPARISON WITH ATSR



In [410]:

    
df_with_bcm = ats_annual_frp_by_country_with_sensor.copy()

df_with_bcm = ats_annual_frp_by_country.merge(bcm_df, on=['countries', 'year'])
df_with_bcm.dropna(inplace=True)

# get rid of end years
df_with_bcm = df_with_bcm[df_with_bcm.year != 2012]
df_with_bcm = df_with_bcm[df_with_bcm.year != 1991]
df_with_bcm = df_with_bcm[df_with_bcm.year != 1992]
df_with_bcm = df_with_bcm[df_with_bcm.year != 2001]
df_with_bcm = df_with_bcm[df_with_bcm.year != 2002]

# russia and nigeria
df_with_bcm_rn = df_with_bcm.copy()
df_with_bcm_rn = df_with_bcm_rn[(df_with_bcm_rn.countries == 'Russia') | 
                                (df_with_bcm_rn.countries == 'Nigeria')]

# all other countires
df_with_bcm_not_rn = df_with_bcm.copy()
df_with_bcm_not_rn = df_with_bcm_not_rn[(df_with_bcm_not_rn.countries != 'Russia') & 
                                (df_with_bcm_not_rn.countries != 'Nigeria')]



In [411]:

    
plt.close('all')
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))

h = ax.hexbin(df_with_bcm.cloud_adjusted_mean_frp, df_with_bcm.bcm, gridsize=50, bins='log', mincnt=1,
          cmap='rainbow', linewidths=0.5, edgecolors='k')
cbar = plt.colorbar(mappable=h).set_label('Log Sample Count', fontsize=20)

sns.regplot(df_with_bcm_rn.cloud_adjusted_mean_frp, df_with_bcm_rn.bcm, ci=68, color='k', ax=ax, scatter=False)
sns.regplot(df_with_bcm_not_rn.cloud_adjusted_mean_frp, df_with_bcm_not_rn.bcm, ci=68, color='k', ax=ax, scatter=False)

ax.set(ylabel='DMSP Annual Flare Gas Volume (BCM)', xlabel='ATSR Annual Normalised FRP (MW)')
ax.yaxis.label.set_size(20)
ax.xaxis.label.set_size(20)

slope0, intercept0, r_value0, _, _ = scipy.stats.linregress(df_with_bcm_rn.cloud_adjusted_mean_frp, 
                                                            df_with_bcm_rn.bcm)
slope1, intercept1, r_value1, _, _ = scipy.stats.linregress(df_with_bcm_not_rn.cloud_adjusted_mean_frp, 
                                                            df_with_bcm_not_rn.bcm)


textstr0 = '$R^2=%.3f$\n$y=%.2fx + %.2f$\n Samples$=%.0f$' % (r_value0**2, slope0, intercept0, df_with_bcm_rn.bcm.shape[0])
textstr1 = '$R^2=%.3f$\n$y=%.2fx + %.2f$\n Samples$=%.0f$' % (r_value1**2, slope1, intercept1, df_with_bcm_not_rn.bcm.shape[0]) 

props = dict(boxstyle='round', facecolor='white', alpha=0.5)

ax.text(0.6, 0.9, textstr0, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
ax.text(0.7, 0.22, textstr1, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)
plt.savefig(os.path.join(output_path, 'dmsp_vs_ats.png'), bbox_inches='tight', dpi=600)

plt.show()

ANNUAL TIMESERIES: COUNTRY LEVEL SUMMED MEAN FLARE FRP

Each flare has its annual mean computed and then all flares for a given nation are summed together for the year



In [412]:

    
# reduce the data to only complete years
ats_with_sensor_df = ats_annual_frp_by_country_with_sensor.copy()
sls_with_sensor_df = sls_annual_frp_by_country_wtih_sensor.copy()

# for yr in [1991, 2012, 2016]:
#     ats_with_sensor_df = ats_with_sensor_df[ats_with_sensor_df.year != yr]
#     sls_with_sensor_df = sls_with_sensor_df[sls_with_sensor_df.year != yr]
    
# combine the dataframes
combined_with_sensor_df = pd.concat([ats_with_sensor_df, sls_with_sensor_df])
    
# get the global annual FRP values
global_frp = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp.cloud_adjusted_mean_frp /= 1000  # why this?  Conversion to GW 
global_frp = global_frp.pivot('year', 'sensor', 'cloud_adjusted_mean_frp')
global_frp.columns.name = None
global_frp.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_frp = global_frp[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
    
# now aggregate to remove sensor information from screened data 
to_group = ['countries', 'year']
to_agg = {'cloud_adjusted_mean_frp': np.sum}
combined_df = combined_with_sensor_df.groupby(to_group, as_index=False).agg(to_agg)
combined_df.cloud_adjusted_mean_frp /= 1000

# # find the top n flaring countries
n_countries = 30
top_flaring_countries = combined_df.groupby('countries').agg({'cloud_adjusted_mean_frp': np.sum}).sort_values('cloud_adjusted_mean_frp', ascending=False).head(n_countries).reset_index()

# # reduce to the top countries
top_flaring_activity = combined_df.merge(top_flaring_countries[['countries']],  on=['countries'])

# pivoted
pivoted_df = top_flaring_activity.pivot('year', 'countries', 'cloud_adjusted_mean_frp')
df_list = []
for c in top_flaring_countries.countries.values:
    df_list.append(pivoted_df[c])
top_flaring_activity = pd.concat(df_list, axis=1)



In [413]:

    
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))

fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, 
                               gridspec_kw={'height_ratios': [2, 7.5]},
                               figsize=(15, 12))

sns.heatmap(global_frp.T, linewidths=1,
            robust=True, annot=True, 
            fmt='.1f', annot_kws={'fontsize':13}, 
            cbar=False,
            cmap='Greys_r',
            ax=ax0)

ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)


sns.heatmap(top_flaring_activity.T, linewidths=1,
            robust=True, annot=True, 
            fmt='.2f', annot_kws={'fontsize':13}, 
            cbar=False,
            vmin=-0.3, vmax=2,
            ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)

ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'ats_frp.png'), bbox_inches='tight', dpi=600)

plt.show()

Combined Plot



In [414]:

    
# reduce the data to only complete years
ats_with_sensor_df = ats_annual_frp_by_country_with_sensor.copy()
sls_with_sensor_df = sls_annual_frp_by_country_wtih_sensor.copy()

# for yr in [1991, 2012, 2016]:
#     ats_with_sensor_df = ats_with_sensor_df[ats_with_sensor_df.year != yr]
#     sls_with_sensor_df = sls_with_sensor_df[sls_with_sensor_df.year != yr]
    
# combine the dataframes
combined_with_sensor_df = pd.concat([ats_with_sensor_df, sls_with_sensor_df])

# get the global annual FRP values
global_frp = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp.cloud_adjusted_mean_frp /= 1000  # why this?  Conversion to MW 
global_frp = global_frp.pivot('year', 'sensor', 'cloud_adjusted_mean_frp')
global_frp.columns.name = None
global_frp.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
global_frp = global_frp[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]
    
# now aggregate to remove sensor information from screened data 
to_group = ['countries', 'year']
to_agg = {'cloud_adjusted_mean_frp': np.sum}
combined_df = combined_with_sensor_df.groupby(to_group, as_index=False).agg(to_agg)
combined_df.cloud_adjusted_mean_frp /= 1000

# # find the top n flaring countries
n_countries = 30
top_flaring_countries = combined_df.groupby('countries').agg({'cloud_adjusted_mean_frp': np.sum}).sort_values('cloud_adjusted_mean_frp', ascending=False).head(n_countries).reset_index()

# # reduce to the top countries
top_flaring_activity = combined_df.merge(top_flaring_countries[['countries']],  on=['countries'])

# pivoted
pivoted_df = top_flaring_activity.pivot('year', 'countries', 'cloud_adjusted_mean_frp')
df_list = []
for c in top_flaring_countries.countries.values:
    df_list.append(pivoted_df[c])
top_flaring_activity = pd.concat(df_list, axis=1)



In [415]:

    
global_frp_plot = combined_with_sensor_df.groupby(['year'], as_index=True).agg({'cloud_adjusted_mean_frp': np.sum})
global_frp_plot.cloud_adjusted_mean_frp /= 1000  # why this?  Conversion to GW

pc_plot_frp = pd.DataFrame(top_flaring_activity.values / global_frp_plot.values * 100,
             columns=top_flaring_activity.T.index, 
             index=top_flaring_activity.index)



In [416]:

    
# sort out diff plot showing only years with reasonable sampling

diff_plot = pc_plot_frp.copy(deep=True)
diff_plot.drop([1991,1992,2012], axis=0, inplace=True)
diff_plot = diff_plot - diff_plot.shift()
diff_plot.loc[1991] = np.nan
diff_plot.loc[1992] = np.nan
diff_plot.loc[2012] = np.nan
diff_plot.sort_index(inplace=True)



In [417]:

    
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))

fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1, 
                               gridspec_kw={'height_ratios': [2, 7.5, 7.5]},
                               figsize=(15, 22))

sns.heatmap(global_frp.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13},
            fmt='.1f',
            cbar=False,
            cmap='Greys_r',
            vmax=18,
            ax=ax0)

ax0.set(xlabel='', ylabel='a) Global FRP (GW)')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)

sns.heatmap(pc_plot_frp.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13}, 
            fmt='.1f',
            cbar=False,
            cmap='viridis',
            vmax = 18,
            ax=ax1)
ax1.set(xlabel='', ylabel='b) Fraction of Global FRP (%)')
ax1.yaxis.label.set_size(20)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)

sns.heatmap(diff_plot.T, linewidths=1,
            robust=True, annot=True, 
            annot_kws={'fontsize':13}, 
            fmt='.1f',
            cbar=False,
            cmap='coolwarm',
            vmax=4, vmin=-4,
            ax=ax2)
ax2.set(xlabel='Year', ylabel='c) Annual Change in Fraction of Global FRP (%)')
ax2.yaxis.label.set_size(20)
ax2.xaxis.label.set_size(20)

ax2.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax2.set_yticklabels(ax1.get_yticklabels(), fontsize=12)

plt.savefig(os.path.join(output_path, 'flare_frp_analysis.png'), bbox_inches='tight', dpi=600)

plt.show()



In [750]:

    
pc_plot_frp.sum(axis=1)









    Out[750]:





year
1991    94.052750
1992    95.500967
1993    94.589988
1994    92.600855
1995    94.158324
1996    94.198229
1997    94.335789
1998    95.080614
1999    94.119331
2000    94.829969
2001    94.908865
2002    95.432699
2003    95.383071
2004    94.916067
2005    95.276248
2006    95.381617
2007    94.909650
2008    94.545675
2009    94.571420
2010    94.498146
2011    94.335479
2012    94.196450
2018    94.845715
dtype: float64

Percent Change



In [418]:

    
frp_global_pc_change = combined_with_sensor_df.groupby(['sensor', 'year'], as_index=False).agg({'cloud_adjusted_mean_frp': np.sum})
frp_global_pc_change = frp_global_pc_change.sort_values('year')
frp_global_pc_change['pc_change'] = (frp_global_pc_change.cloud_adjusted_mean_frp / frp_global_pc_change.cloud_adjusted_mean_frp.shift() - 1) 
frp_global_pc_change.drop('cloud_adjusted_mean_frp', axis=1, inplace=True)
frp_global_pc_change = frp_global_pc_change.pivot('year', 'sensor', 'pc_change')
frp_global_pc_change.rename(columns={'at1': 'AT1', 'at2':'AT2', 'ats':'ATS', 'sls':'SLS'}, inplace=True)
frp_global_pc_change = frp_global_pc_change[['AT1', 'AT1 & AT2', 'AT2', 'AT2 & ATS', 'ATS', 'SLS']]


frp_country_pc_change = (top_flaring_activity / top_flaring_activity.shift() - 1) 
for yr in [1992]:
    frp_global_pc_change = frp_global_pc_change[frp_global_pc_change.index != yr]
    frp_country_pc_change = frp_country_pc_change[frp_country_pc_change.index != yr]



In [419]:

    
#frp_global_pc_change = frp_global_pc_change[frp_global_pc_change.index >= 2003]
#frp_global_pc_change = frp_global_pc_change.T[(frp_global_pc_change.T.index == u'ATS') |
#                                              (frp_global_pc_change.T.index == u'SLS')].T

#frp_country_pc_change = frp_country_pc_change[frp_country_pc_change.index >= 2003]



In [420]:

    
plt.close('all')
#fig, ax = plt.figure(figsize=(18,7))

fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, 
                               gridspec_kw={'height_ratios': [2, 7.5]},
                               figsize=(15, 12))

sns.heatmap(frp_global_pc_change.T*100, linewidths=1,
            robust=True, annot=True, 
            fmt='.0f', annot_kws={'fontsize':13}, 
            cbar=False,
            cmap='coolwarm',
            vmax=10, vmin=-10,
            ax=ax0)

ax0.set(xlabel='', ylabel='Global')
ax0.yaxis.label.set_size(20)
ax0.set_xticklabels(ax0.get_xticklabels(), fontsize=12)
ax0.set_yticklabels(ax0.get_yticklabels(), fontsize=12, rotation=0)


sns.heatmap(frp_country_pc_change.T*100, linewidths=1,
            robust=True, annot=True, 
            fmt='.0f', annot_kws={'fontsize':13}, 
            cbar=False,
            cmap='coolwarm',
            vmax=50, vmin=-50,
            ax=ax1)
ax1.set(xlabel='Year', ylabel='Country')
ax1.yaxis.label.set_size(20)
ax1.xaxis.label.set_size(20)

ax1.set_xticklabels(ax1.get_xticklabels(), fontsize=12)
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
plt.savefig(os.path.join(output_path, 'ats_frp_change.png'), bbox_inches='tight', dpi=600)

plt.show()



In [421]:

    
ats_with_sensor_df.head()









    Out[421]:







  
    
      
      year
      countries
      sensor
      cloud_adjusted_mean_frp
    
  
  
    
      0
      1991
      Algeria
      at1
      775.507689
    
    
      1
      1991
      Argentina
      at1
      25.079550
    
    
      2
      1991
      Aruba
      at1
      0.157931
    
    
      3
      1991
      Australia
      at1
      6.247497
    
    
      4
      1991
      Azerbaijan
      at1
      2.098608

Global Counts vs. FRP



In [422]:

    
counts = global_activity.sum(axis=1)
frp = global_frp.sum(axis=1)



In [423]:

    
plt.close('all')
plt.plot(counts.index, frp.values / counts.values, 'r.')
plt.show()

Analysis of Exceptional Countries



In [424]:

    
ats_flare_df_with_countries = ats_flare_df.merge(ats_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])



In [425]:

    
sls_flare_df_with_countries = sls_flare_df.merge(sls_countries_df[['countries', 'lats_arcmin', 'lons_arcmin']],  on=['lats_arcmin', 'lons_arcmin'])

U.K.



In [426]:

    
ats_flare_df_uk_1994 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'United Kingdom') &
                                                            (ats_flare_df_with_countries.year == 1994)]



In [427]:

    
ats_flare_df_uk_1994[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)

Malaysia



In [428]:

    
ats_flare_df_malaysia_2011 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Malaysia') &
                                                            (ats_flare_df_with_countries.year == 2011)]



In [429]:

    
ats_flare_df_malaysia_2011[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)









    Out[429]:







  
    
      
      year
      month
      day
      hhmm
      frp
      lats
      lons
    
  
  
    
      1447133
      2011
      10
      10
      1240
      1483.605364
      3.283333
      113.083336
    
    
      80256
      2011
      10
      10
      1240
      797.402541
      6.783334
      116.233330
    
    
      2467972
      2011
      10
      10
      1240
      680.190823
      3.550000
      112.816681
    
    
      79538
      2011
      10
      10
      1240
      673.329154
      6.616667
      115.783348
    
    
      573243
      2011
      10
      10
      1240
      414.098024
      6.600000
      115.783348
    
    
      2514564
      2011
      10
      10
      1240
      413.334043
      6.600000
      115.766670
    
    
      1446571
      2011
      10
      10
      1240
      339.196626
      3.283333
      113.066673
    
    
      1911699
      2011
      10
      10
      1240
      319.460852
      3.266667
      113.066673
    
    
      2816021
      2011
      10
      10
      1240
      281.860080
      5.616667
      114.883331
    
    
      1911467
      2011
      10
      10
      1240
      274.944711
      3.266667
      113.050011
    
    
      1911825
      2011
      10
      10
      1240
      274.708376
      3.283333
      113.050011
    
    
      2181728
      2011
      10
      10
      1240
      253.225450
      3.266667
      113.083336
    
    
      1568158
      2011
      10
      10
      1240
      235.660683
      4.708333
      113.950005
    
    
      2514353
      2011
      10
      10
      1240
      191.369266
      3.566667
      112.816681
    
    
      97262
      2011
      10
      10
      1240
      182.137934
      6.783334
      116.216667
    
    
      2514534
      2011
      10
      10
      1240
      181.112946
      3.566667
      112.833344
    
    
      2630090
      2011
      10
      10
      1240
      179.783352
      3.550000
      112.800003
    
    
      1447448
      2011
      10
      10
      1240
      178.396091
      3.300000
      113.083336
    
    
      1467965
      2011
      7
      8
      1325
      152.447936
      5.650000
      104.983345
    
    
      1467964
      2011
      7
      8
      1506
      152.258321
      5.650000
      104.983345

Indonesia



In [430]:

    
ats_flare_df_indonesia_2011 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Indonesia') &
                                                            (ats_flare_df_with_countries.year == 2011)]



In [431]:

    
ats_flare_df_indonesia_2011[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)

Russia



In [432]:

    
ats_flare_df_russia_2002 = ats_flare_df_with_countries[(ats_flare_df_with_countries.countries == 'Russia') &
                                                            (ats_flare_df_with_countries.year == 2002)]



In [433]:

    
ats_flare_df_russia_2002[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)

Venezuela



In [434]:

    
sls_flare_df_venezuela = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Venezuela')]
sls_flare_df_venezuela[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(50)

United States



In [435]:

    
sls_flare_df_us = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'United States')]
sls_flare_df_us[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)









    Out[435]:







  
    
      
      year
      month
      day
      hhmm
      frp
      lats
      lons
    
  
  
    
      81842
      2018
      4
      9
      421
      152.567989
      31.733333
      -101.800000
    
    
      77916
      2017
      7
      28
      435
      135.313939
      31.983333
      -102.633333
    
    
      76895
      2017
      9
      1
      424
      130.612883
      31.650000
      -101.766667
    
    
      72244
      2017
      7
      2
      406
      105.603770
      27.800000
      -97.600000
    
    
      64467
      2018
      3
      21
      416
      99.318947
      31.366667
      -101.783333
    
    
      76896
      2017
      10
      6
      420
      95.955763
      31.650000
      -101.766667
    
    
      72253
      2017
      9
      25
      402
      95.523998
      27.800000
      -97.600000
    
    
      46463
      2017
      9
      14
      347
      92.487338
      30.050000
      -93.750000
    
    
      72245
      2017
      7
      6
      402
      90.767055
      27.800000
      -97.600000
    
    
      54991
      2017
      11
      3
      351
      75.911598
      29.716667
      -95.133333
    
    
      12996
      2017
      7
      17
      420
      69.515430
      31.966667
      -102.600000
    
    
      68107
      2017
      10
      29
      424
      64.186839
      32.605556
      -103.316667
    
    
      117447
      2018
      2
      15
      355
      61.448686
      29.950000
      -93.883333
    
    
      75897
      2018
      1
      23
      351
      60.917666
      29.983333
      -90.450000
    
    
      84109
      2018
      1
      4
      343
      60.779668
      30.233333
      -91.050000
    
    
      106598
      2017
      10
      21
      431
      58.392603
      32.700000
      -103.283333
    
    
      72252
      2017
      9
      13
      413
      57.776209
      27.800000
      -97.600000
    
    
      64451
      2017
      7
      21
      416
      55.239907
      31.366667
      -101.783333
    
    
      99828
      2017
      10
      15
      343
      50.107069
      27.200000
      -90.033333
    
    
      62993
      2017
      6
      13
      358
      49.462739
      29.850000
      -93.983333

Iraq



In [436]:

    
sls_flare_df_iraq = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Iraq')]
sls_flare_df_iraq[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)

Iran



In [437]:

    
sls_flare_df_iran = sls_flare_df_with_countries[(sls_flare_df_with_countries.countries == 'Iran')]
sls_flare_df_iran[['year', 'month', 'day', 'hhmm', 'frp', 'lats', 'lons']].sort_values('frp', ascending=False).head(20)

Total Flare Emission Map

This map shows each flares emissions over the entire ATSR timeseries. We use the screened data (from the screening process above) and the FRP is the cloud adjusted values. So it takes into account the persistency and typical cloud cover of the flare when computing the average.

This is currently showing the mean over the lifetime. Change it to back to mean over year, multiply by time in year, then group over years to get flare activity.



In [438]:

    
def myround(x, base=0.5):
    return base * np.round(x/base)

def plot_total_frp(frp, binned_stat, top_flares):
    plt.close('all')
    fig = plt.figure(figsize=(20,9))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.set_xlim(( -180, 180))
    ax.set_ylim((-90, 90))
    ax.coastlines(color='white', linewidth=0.75)
    
    ax.imshow(np.tile(np.array([[[0,0,0]]], 
          dtype=np.uint8), [2, 2, 1]),
      origin='upper',
      transform=ccrs.PlateCarree(),
      extent=img_extent)
    


    #ax.text(0.94, 0.92, lab, transform=ax.transAxes, fontsize=25, color='w')
    
    
    scat = ax.scatter(frp_df.lons, frp_df.lats,
              s=0.05,
              color='cyan',
              alpha=0.2,
              label='Flare Locations') 
    
    scat2 = ax.scatter(top_flares.lons, top_flares.lats,
                       s=55,
                       facecolor='chartreuse',
                       edgecolor='k',
                       alpha=1,
                       label='10% Flare Locations')   
    
    leg0 = ax.legend(loc = 3, scatterpoints = 1, prop={'size': 15,})
    leg0.get_frame().set_facecolor('k')
    leg0.legendHandles[0]._sizes = [40]
    leg0.legendHandles[0].set_alpha(1)
    
    leg0.legendHandles[1]._sizes = [110]
    leg0.legendHandles[1].set_alpha(1)
    
    plt.setp(leg0.get_texts(), color='w')

    img_extent = (-180, 180, -90, 90)
    im = ax.imshow(binned_stat, origin='upper', cmap='inferno', norm=LogNorm(vmin=10, vmax=1000), alpha=1,
              extent=img_extent, transform=ccrs.PlateCarree())
    cbar = plt.colorbar(mappable=im).set_label('Log BCM', fontsize=20)
    

    #-------------------
    # SUBPLOTS
    #-------------------
    
    # set up the subaxes and locations
    sub_x_extent = [(46, 53), (5,25), (4.5, 13.5)]
    sub_y_extent = [(25, 33), (27,33), (-9, 7)]
    rects = [[0.43, 0.2, 0.3, 0.25], [0.27, 0.68, 0.15, 0.2], [-0.12, 0.27, 0.6, 0.4]]
    locs = [(1,2), (3,4), (1,4)]

    # add in zoomed plots for rois
    for sub_x, sub_y, rect, loc in zip(sub_x_extent, sub_y_extent, rects, locs):
    
        sub_ax = fig.add_axes(rect,
                              projection=ccrs.PlateCarree(), )
        sub_ax.set_xlim(sub_x)
        sub_ax.set_ylim(sub_y)
        sub_ax.outline_patch.set_edgecolor('white')

        # show inset loc
        mark_inset(ax, sub_ax, loc1=loc[0], loc2=loc[1], fc="none", lw=1, ec='w')

        # add background
        sub_ax.coastlines(color='white', linewidth=0.75)
#         country_boundaries = feature.NaturalEarthFeature(category='cultural',
#                                                 name='admin_0_countries',
#                                                 scale='10m', facecolor='none')
#        sub_ax.add_feature(country_boundaries, edgecolor='white')
        sub_ax.imshow(np.tile(np.array([[[0,0,0]]], 
              dtype=np.uint8), [2, 2, 1]),
          origin='upper',
          transform=ccrs.PlateCarree(),
          extent=sub_x + sub_y)

        sub_scat = sub_ax.scatter(frp_df.lons, frp_df.lats,
              s=0.05,
              color='cyan',
              alpha=0.2) 

        sub_scat2 = sub_ax.scatter(top_flares.lons, top_flares.lats,
                           s=55,
                           facecolor='chartreuse',
                           edgecolor='k',
                           alpha=1) 
    #plt.savefig(os.path.join(output_path, 'flare_map.png'), bbox_inches='tight', dpi=600)
    plt.show()



In [439]:

    
# compute the annual mean for each flare
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'frp': np.sum,
          'sample_counts_flare': np.sum,
          'lats': np.mean,
          'lons': np.mean}
summed_flare_df = ats_flare_df.groupby(to_group, as_index=False).agg(to_agg)

# get the cloud cover adjusted sampling
to_group = ['lats_arcmin', 'lons_arcmin', 'year']
to_agg = {'cloud_cover': np.mean,
          'sample_counts_all': np.sum}
summed_sample_df = ats_flare_sampling_df.groupby(to_group, as_index=False).agg(to_agg)

# merge and get cloud cloud adjusted sampling (i.e the expected number of observations)
frp_df = pd.merge(summed_flare_df, summed_sample_df, on=to_group)
frp_df['cloud_adjusted_sample_counts'] = frp_df.sample_counts_all * (1-frp_df.cloud_cover)

# if expected less than actual, then set to actual number of obsercations
mask = frp_df['cloud_adjusted_sample_counts'] < frp_df['sample_counts_flare']
frp_df.loc[mask, 'cloud_adjusted_sample_counts'] = frp_df.loc[mask, 'sample_counts_flare']

# compute mean frp
frp_df['cloud_adjusted_mean_frp'] = frp_df.frp / frp_df.cloud_adjusted_sample_counts

# get yearly sums
to_group = ['lats_arcmin', 'lons_arcmin']
to_agg = {'cloud_adjusted_mean_frp': np.sum, 'lats': np.mean, 'lons': np.mean}
annual_grouped_df = frp_df.groupby(to_group, as_index=False).agg(to_agg)

now get top flares for plotting



In [440]:

    
annual_grouped_df['frp_pc_total'] =  annual_grouped_df.cloud_adjusted_mean_frp / annual_grouped_df.cloud_adjusted_mean_frp.sum()



In [441]:

    
sorted_annual_grouped_df = annual_grouped_df.sort_values('frp_pc_total', ascending=False)
sorted_annual_grouped_df['frp_pc_cumsum'] = sorted_annual_grouped_df.frp_pc_total.cumsum()
sorted_annual_grouped_df.reset_index(inplace=True, drop=True)



In [790]:

    
annual_grouped_df[(annual_grouped_df.lats_arcmin==3140) & (annual_grouped_df.lons_arcmin==604)]









    Out[790]:







  
    
      
      lats_arcmin
      lons_arcmin
      cloud_adjusted_mean_frp
      lons
      lats
      frp_pc_total
    
  
  
    
      13390
      3140
      604
      765.107069
      6.066668
      31.66667
      0.002745



In [792]:

    
sorted_annual_grouped_df.head(20)









    Out[792]:







  
    
      
      lats_arcmin
      lons_arcmin
      cloud_adjusted_mean_frp
      lons
      lats
      frp_pc_total
      frp_pc_cumsum
    
  
  
    
      0
      3140
      604
      765.107069
      6.066668
      31.666670
      0.002745
      0.002745
    
    
      1
      3148
      604
      709.833820
      6.066668
      31.800001
      0.002547
      0.005292
    
    
      2
      3148
      603
      590.334607
      6.050001
      31.800001
      0.002118
      0.007410
    
    
      3
      2838
      947
      584.039922
      9.789188
      28.633335
      0.002096
      0.009506
    
    
      4
      3140
      603
      579.786994
      6.050001
      31.666670
      0.002080
      0.011586
    
    
      5
      2855
      1946
      476.794746
      19.783335
      28.916666
      0.001711
      0.013297
    
    
      6
      3041
      4720
      440.002166
      47.333340
      30.683332
      0.001579
      0.014876
    
    
      7
      3017
      4722
      435.778274
      47.366669
      30.283335
      0.001564
      0.016439
    
    
      8
      3847
      6437
      425.034570
      64.627984
      38.783337
      0.001525
      0.017964
    
    
      9
      2226
      5358
      393.469800
      53.966671
      22.433334
      0.001412
      0.019376
    
    
      10
      3013
      4723
      393.225861
      47.383335
      30.216671
      0.001411
      0.020787
    
    
      11
      3549
      -14
      393.016787
      -0.247187
      35.816660
      0.001410
      0.022197
    
    
      12
      1926
      -9205
      381.682616
      -92.083329
      19.433334
      0.001370
      0.023567
    
    
      13
      1921
      -9200
      380.047227
      -92.000008
      19.366667
      0.001364
      0.024931
    
    
      14
      2554
      5133
      378.106833
      51.549996
      25.900002
      0.001357
      0.026287
    
    
      15
      3045
      4740
      374.732062
      47.666668
      30.750000
      0.001345
      0.027632
    
    
      16
      1918
      -9210
      367.990784
      -92.166672
      19.300001
      0.001320
      0.028952
    
    
      17
      3108
      4919
      359.919905
      49.316666
      31.133335
      0.001291
      0.030244
    
    
      18
      3221
      4742
      349.407108
      47.699997
      32.350002
      0.001254
      0.031497
    
    
      19
      3025
      633
      348.191729
      6.550000
      30.416666
      0.001249
      0.032747



In [443]:

    
n_flares_10_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.1)
print n_flares_10_pc
n_flares_50_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.5)
print n_flares_50_pc
n_flares_90_pc = np.sum(sorted_annual_grouped_df.frp_pc_cumsum<0.9)
print n_flares_90_pc



In [444]:

    
plt.close('all')
plt.figure(figsize=(9,9))
plt.plot(np.arange(sorted_annual_grouped_df.shape[0]), sorted_annual_grouped_df.frp_pc_total.cumsum(), 'k--',
         linewidth=1, label='Cumulative Percentage')
plt.plot([n_flares_10_pc, n_flares_10_pc], [-0.1, 0.1], 'k-', linewidth=0.5, label='10% (88 Flares)')
plt.plot([-1000, n_flares_10_pc], [0.1, 0.1], 'k-', linewidth=1)

plt.plot([n_flares_50_pc, n_flares_50_pc], [-0.1, 0.5], 'r-', linewidth=0.5, label='50% (1311 Flares)')
plt.plot([-1000, n_flares_50_pc], [0.5, 0.5], 'r-', linewidth=1)


plt.plot([n_flares_90_pc, n_flares_90_pc], [-0.1, 0.9], 'b-', linewidth=0.5, label='90% (7868 Flares)')
plt.plot([-1000, n_flares_90_pc], [0.9, 0.9], 'b-', linewidth=1)

plt.legend(loc=4,prop={'size': 12,})
plt.ylim(-0, 1)
plt.xlim(-750, 21000)
plt.xlabel('Flare Count', fontsize=14)
plt.ylabel('CDF', fontsize=14)
plt.savefig(os.path.join(output_path, 'ats_cumulative_frp.png'), bbox_inches='tight', dpi=600)
plt.show()



In [445]:

    
bin_x = np.arange(-180, 185, 5)
bin_y = np.arange(-90, 95, 5)
bcm = 0.01*annual_grouped_df.cloud_adjusted_mean_frp + 0.52

binned_data = stats.binned_statistic_2d(annual_grouped_df.lons, annual_grouped_df.lats, 
                                        bcm, 'sum',
                                        bins=[bin_x, bin_y])
binned_stat = np.ma.masked_array(binned_data.statistic, binned_data.statistic==0)



In [910]:

    
def hist_count(df):
    bin_x = np.arange(-180, 181, 1)
    bin_y = np.arange(-90, 91, 1)
    meshed_x, meshed_y = np.meshgrid(bin_x, bin_y)
    meshed_x = meshed_x[:-1,:-1] + 0.5
    meshed_y = meshed_y[:-1,:-1] + 0.5

    binned_data = stats.binned_statistic_2d(df.lons, df.lats, 
                                            np.ones(df.lons.size), 'sum',
                                            bins=[bin_x, bin_y]).statistic.T
    mask = binned_data == 0
    Z = np.ma.masked_array(binned_data, mask)

    return meshed_x, meshed_y, Z    
    

def plot_bcm_totals(flare_df, cum_df):
    
    
    # set up figure

    # set up figure
    plt.close('all')
    fig = plt.figure(figsize=(20,9))
    fig.subplots_adjust(hspace=0.001, wspace=0.1)
    ax = plt.axes(projection=ccrs.EqualEarth())

    ax.set_global()
    ax.outline_patch.set_edgecolor('white')
    ax.coastlines(color='white', linewidth=0.75)
    ax.gridlines(color='white', linewidth=0.75, alpha=0.5)

    img_extent = (-180, 180, -180, 180)
    ax.imshow(np.tile(np.array([[[0,0,0]]], 
          dtype=np.uint8), [2, 2, 1]),
      origin='upper',
      transform=ccrs.PlateCarree(),
      extent=img_extent)
    
    X, Y, Z = hist_count(flare_df)

#     levels= np.linspace(0, 2, 21)
    
#     cmap = plt.get_cmap('inferno')
#     norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
#     p = ax.pcolormesh(X, Y, Z, cmap=cmap, transform=ccrs.PlateCarree())
    
    img_extent = (-180, 180, -90, 90)
    im = ax.imshow(Z, origin='lower', cmap='inferno', norm=LogNorm(vmin=1, vmax=500),
              extent=img_extent, transform=ccrs.PlateCarree())
    
    





  
    cbar = plt.colorbar(mappable=im, ax=ax).set_label('Log BCM', fontsize=20)
    plt.savefig(os.path.join(output_path, 'flare_map.png'), bbox_inches='tight', dpi=600)

    
    plt.show()



In [911]:

    
plot_bcm_totals(annual_grouped_df, sorted_annual_grouped_df)



In [496]:

    
def plot_data_totals(df):
    
    # set up figure
    plt.close('all')
    fig = plt.figure(figsize=(15,20))
    ax1 = plt.subplot(1, 1, 1, projection=ccrs.EqualEarth())
    
    # split data into top 10 pc and other 90
    max_frp_pc = df.frp_pc_total.max()
    top_10pc = df[df.frp_pc_cumsum<=0.1]
    bottom_90pc= df[df.frp_pc_cumsum>0.1]
    

    ax_list = [ax1]
    for i, ax in enumerate(ax_list):
        ax.set_global()
        ax.outline_patch.set_edgecolor('black')
        ax.coastlines(color='black', linewidth=0.75)
        ax.gridlines(color='black', linewidth=0.75, alpha=0.5)

    # make main plot
    #ax_list[0].text(0.94, 0.92, lab, transform=ax_list[0].transAxes, fontsize=25)
    scat = ax_list[0].scatter(top_10pc.lons, top_10pc.lats,
                              s=(top_10pc.frp_pc_total / max_frp_pc * 10)**3, 
                              edgecolors='k',
                              facecolor='r',
                              transform=ccrs.PlateCarree())
    scat = ax_list[0].scatter(bottom_90pc.lons, bottom_90pc.lats,
                              s=(bottom_90pc.frp_pc_total / max_frp_pc * 10)**3, 
                              facecolor='b',
                              transform=ccrs.PlateCarree())

    plt.show()



In [497]:

    
plot_data_totals(sorted_annual_grouped_df)



In [795]:

    
sorted_annual_grouped_df.head()









    Out[795]:







  
    
      
      lats_arcmin
      lons_arcmin
      cloud_adjusted_mean_frp
      lons
      lats
      frp_pc_total
      frp_pc_cumsum
    
  
  
    
      0
      3140
      604
      765.107069
      6.066668
      31.666670
      0.002745
      0.002745
    
    
      1
      3148
      604
      709.833820
      6.066668
      31.800001
      0.002547
      0.005292
    
    
      2
      3148
      603
      590.334607
      6.050001
      31.800001
      0.002118
      0.007410
    
    
      3
      2838
      947
      584.039922
      9.789188
      28.633335
      0.002096
      0.009506
    
    
      4
      3140
      603
      579.786994
      6.050001
      31.666670
      0.002080
      0.011586

Top 10% of flaring sites and individual contributions from ATSR time series



In [799]:

    
sorted_annual_grouped_df.cloud_adjusted_mean_frp.sum() * 0.095









    Out[799]:





26476.535751505933

Global BCM



In [ ]:

    
bcm = 0.01 * global_frp.sum(axis=1)  * 1000 + 0.52
plt.close('all')
plt.bar(bcm.index, bcm.values)
plt.show()



In [ ]:

    
bcm



In [ ]:

    
global_frp.tail()

----------- NOTES -----------

Notes:

AT1 deficiences in Nighttime cloud masking due to only 11 & 12 um channels (so ineffective cloud masking) this means that we cannot trust the cloud adjusted FRP. For ATSR flares, we instead use the cloud statistics for each flare from AT2 or ATS if available (with the assumption that cloud behaviour is generally consistent over any given year)
AT1 Data until May 1996
AT2 scan mirror failed December 1995 to July 1996 (• ATSR-1 data available during this period except June 1996)
ERS-2 Gyro failure in January 2001 (Months since gyro failure that are currently available: July to December 2001, July-August 2002, Jan-June 2003) http://earth.esa.int/workshops/meris_aatsr2008/participants/616/pres_616_Veal.pdf
ATSR-1 ERS-1 Jul 1991
ATSR-2 ERS-2 Apr 1995
AATSR ENVISAT Mar 2002

TODO:

Replace ATSR cloud fraction for each flare with that from ATSR-2 or AATSR
Figure out cause of sampling differences between the three sensors (plot sampling)
Sort out mixed years (1996: Jan-May AT1, Jul-Dec AT2; 2002: Jan-Mar AT2 Apr-Dec ATS) DONE

https://earth.esa.int/pi/esa?topSelectedNavigationNodeId=RESULTS&colorTheme=03&type=upload&ts=1483208834953&table=result&targetIFramePage=%2Fweb%2Fguest%2Fpi-community%2Fresults&cmd=image&id=2780 See here for instrument coverage



In [ ]:

	year	month	day	hhmm	frp	lats	lons
2974205	1994	8	28	2043	427.572745	61.050003	1.800000
2962084	1994	4	18	2118	303.817742	61.266670	0.733333
2344448	1994	10	26	2111	251.616809	57.916668	0.033333
2947180	1994	9	10	2126	232.118075	57.600002	-1.816667
2928005	1994	9	7	2132	229.308911	50.666664	-2.033334
2937398	1994	8	28	2043	213.056040	61.066669	1.750000
2623627	1994	9	7	2132	211.108154	53.650002	-0.250000
2831477	1994	3	4	2133	197.035172	58.833336	-3.083333
1033053	1994	3	4	2133	196.905953	58.833336	-3.100000
2698591	1994	4	29	2132	106.294291	58.300003	0.216667
2036543	1994	4	18	2118	90.829596	61.250008	1.133333
2083502	1994	3	29	2059	83.465539	58.300003	0.200000
2698603	1994	1	25	2059	78.773168	58.300003	0.216667
2943955	1994	8	28	2043	72.251291	61.066669	1.766667
1541139	1994	9	7	2132	72.070804	57.666668	1.166667
1032867	1994	1	31	2059	71.910011	58.800003	1.350000
1541141	1994	8	21	2129	65.419065	57.666668	1.166667
1540844	1994	9	8	2056	60.411841	57.650005	1.150000
2534859	1994	1	28	2059	55.057233	58.883335	1.533334
1541080	1994	4	4	2059	52.223590	57.666668	1.150000

	year	month	day	hhmm	frp	lats	lons
617444	2002	12	5	1540	155.827816	58.149998	78.883339
619451	2002	1	21	1605	145.565925	58.166668	78.866669
617452	2002	9	20	1529	135.651145	58.149998	78.883339
620106	2002	2	3	1556	133.148044	58.166668	78.883339
617474	2002	11	4	1514	128.416892	58.149998	78.883339
620102	2002	2	28	1611	127.376741	58.166668	78.883339
621409	2002	1	12	1548	126.441594	58.166668	78.916672
616805	2002	2	22	1559	126.094361	58.149998	78.861115
617466	2002	10	3	1520	122.666949	58.149998	78.883339
617450	2002	9	26	1540	107.718049	58.149998	78.883339
632163	2002	9	19	1600	106.382255	61.133335	70.183334
620107	2002	3	23	1548	101.398387	58.166668	78.883339
617635	2002	1	8	1614	98.982557	58.149998	78.883339
617455	2002	9	7	1537	91.939109	58.149998	78.883339
620871	2002	3	29	1559	91.253850	58.166668	78.900002
632162	2002	9	22	1606	90.539696	61.133335	70.183334
617447	2002	8	26	1514	89.009218	58.149998	78.883339
617643	2002	3	29	1559	83.332528	58.149998	78.883339
632769	2002	10	21	1554	80.842446	61.133335	70.208336
617640	2002	2	19	1554	79.370160	58.149998	78.883339

	year	month	day	hhmm	frp	lats	lons
45638	2018	5	7	208	456.083597	9.650000	-63.616667
45791	2018	1	31	156	340.589448	9.650000	-63.566667
45539	2017	12	8	156	338.254907	9.650000	-63.633333
45770	2017	10	30	208	318.026814	9.650000	-63.566667
45757	2017	8	22	156	317.466232	9.650000	-63.566667
45541	2017	12	19	211	312.458060	9.650000	-63.633333
45031	2018	4	18	200	303.260452	9.610833	-63.716667
45553	2018	2	11	211	293.348042	9.650000	-63.633333
45535	2017	11	22	211	283.268137	9.650000	-63.633333
45802	2018	3	26	156	282.729631	9.650000	-63.566667
45531	2017	11	3	204	279.963237	9.650000	-63.633333
45751	2017	7	18	204	278.321952	9.650000	-63.566667
45633	2018	4	14	204	277.835292	9.650000	-63.616667
45530	2017	10	30	208	266.475492	9.650000	-63.633333
45794	2018	2	23	200	258.499858	9.650000	-63.566667
44947	2018	1	15	211	253.676287	9.616667	-63.733333
45552	2018	2	7	215	253.542381	9.650000	-63.633333
45781	2017	12	19	211	253.369773	9.650000	-63.566667
46333	2017	9	14	200	251.417051	9.738095	-63.466667
45787	2018	1	15	211	250.112155	9.650000	-63.566667
45546	2018	1	11	215	246.932466	9.650000	-63.633333
45785	2018	1	4	156	243.828088	9.650000	-63.566667
45532	2017	11	7	200	242.754823	9.650000	-63.633333
45811	2018	5	7	208	237.857567	9.650000	-63.566667
45562	2018	3	26	156	234.834444	9.650000	-63.633333
44951	2018	1	31	156	234.220526	9.614815	-63.733333
45803	2018	4	2	215	228.172988	9.650000	-63.566667
46211	2018	1	27	200	227.071447	9.683333	-63.483333
45938	2018	4	14	204	226.139568	9.650000	-63.533333
44962	2018	3	26	156	226.123584	9.616667	-63.733333
45537	2017	11	30	204	223.858944	9.650000	-63.633333
45543	2017	12	27	204	222.432362	9.650000	-63.633333
45786	2018	1	11	215	220.745270	9.650000	-63.566667
44959	2018	3	14	208	220.588275	9.616667	-63.733333
45790	2018	1	27	200	220.366527	9.650000	-63.566667
45528	2017	10	15	156	219.725627	9.650000	-63.633333
45796	2018	2	27	156	217.945704	9.650000	-63.566667
45928	2018	2	27	156	217.526664	9.650000	-63.533333
45542	2017	12	23	208	212.938975	9.650000	-63.633333
46378	2018	4	22	156	211.322352	9.734444	-63.466667
45510	2017	7	10	211	211.169739	9.650000	-63.633333
45550	2018	1	27	200	209.410784	9.650000	-63.633333
45800	2018	3	18	204	208.791561	9.650000	-63.566667
45032	2018	4	22	156	208.299763	9.616667	-63.716667
45756	2017	8	18	200	207.530470	9.650000	-63.566667
45603	2017	11	26	208	206.064195	9.650000	-63.616667
45536	2017	11	26	208	203.319942	9.650000	-63.633333
45518	2017	8	29	215	202.010617	9.650000	-63.633333
45810	2018	5	3	211	201.445572	9.650000	-63.566667
45559	2018	3	14	208	201.193381	9.650000	-63.633333

	year	month	day	hhmm	frp	lats	lons
31918	2018	3	12	1815	295.267264	31.016667	47.283333
31873	2017	9	4	1815	279.129281	31.016667	47.283333
38495	2017	8	19	1833	263.094434	35.683333	43.866667
31860	2017	7	8	1819	253.978266	31.016667	47.283333
32132	2017	6	22	1834	247.717708	31.033333	47.283333
32527	2017	12	17	1819	246.689760	31.083333	47.616667
38507	2017	10	4	1840	241.450683	35.683333	43.866667
32522	2017	11	27	1837	237.756821	31.083333	47.616667
32539	2018	2	1	1826	232.510497	31.083333	47.616667
32195	2018	3	15	1837	232.046860	31.033333	47.283333
38484	2017	6	30	1829	225.131834	35.683333	43.866667
38506	2017	9	30	1844	218.155982	35.683333	43.866667
32171	2017	11	27	1837	217.298081	31.033333	47.283333
30013	2017	12	24	1837	213.531850	30.750000	47.666667
32202	2018	4	11	1837	210.761023	31.033333	47.283333
32168	2017	11	16	1822	195.998822	31.033333	47.283333
30018	2018	1	17	1815	193.211417	30.750000	47.666667
32196	2018	3	19	1834	189.665624	31.033333	47.283333
32524	2017	12	5	1830	189.112035	31.083333	47.616667
28768	2018	3	8	1819	186.973929	30.550000	47.333333

	year	month	day	hhmm	frp	lats	lons
31656	2017	11	12	1826	291.253931	31.000000	48.133333
31628	2017	7	19	1834	280.902360	31.000000	48.133333
31629	2017	7	23	1830	279.928629	31.000000	48.133333
31625	2017	7	8	1819	249.361884	31.000000	48.133333
31643	2017	9	23	1822	240.954485	31.000000	48.133333
31620	2017	6	19	1814	235.525296	31.000000	48.133333
31658	2017	11	20	1819	233.376656	31.000000	48.133333
109469	2017	12	2	1807	230.799744	30.300000	50.016667
31657	2017	11	16	1822	224.228177	31.000000	48.133333
8239	2017	8	24	1800	205.863483	27.500000	52.650000
31634	2017	8	19	1830	202.842654	31.000000	48.133333
31621	2017	6	22	1834	197.564922	31.000000	48.133333
8377	2017	9	16	1804	194.994377	27.516667	52.600000
48322	2017	7	4	1825	192.473853	32.066667	49.100000
31647	2017	10	8	1834	192.166018	31.000000	48.133333
31652	2017	10	28	1815	191.423820	31.000000	48.133333
31635	2017	8	23	1826	189.905164	31.000000	48.133333
31631	2017	8	8	1815	188.865634	31.000000	48.133333
31673	2018	1	21	1814	181.897438	31.000000	48.133333
31615	2017	5	11	1822	178.656860	31.000000	48.133333

	year	sensor	sample_counts_all	cloud_cover
0	1991.0	at1	708397	0.569784
1	1992.0	at1	1747059	0.465828
2	1993.0	at1	1901109	0.371173
3	1994.0	at1	1908243	0.336463
4	1995.0	at1	1843012	0.378199
5	1996.0	at1	876837	0.411917
6	1996.0	at2	1101700	0.596817
7	1997.0	at2	2178329	0.602030
8	1998.0	at2	2130302	0.598983
9	1999.0	at2	2048695	0.613651
10	2000.0	at2	2098806	0.616853
11	2001.0	at2	1823957	0.615854
12	2002.0	at2	449261	0.657764
13	2002.0	ats	1321865	0.586336
14	2003.0	ats	2347610	0.596375
15	2004.0	ats	2416447	0.599768
16	2005.0	ats	2461931	0.591142
17	2006.0	ats	2370869	0.601618
18	2007.0	ats	2430181	0.599744
19	2008.0	ats	2509595	0.614411
20	2009.0	ats	2488354	0.604354
21	2010.0	ats	2438859	0.596634
22	2011.0	ats	2474633	0.599150
23	2012.0	ats	750483	0.625619

	year	sensor	sample_counts_all	cloud_cover
0	1991.0	at1	708397	0.604478
1	1992.0	at1	1747059	0.601497
2	1993.0	at1	1901109	0.600951
3	1994.0	at1	1908243	0.602570
4	1995.0	at1	1843012	0.600449
5	1996.0	at1	876837	0.604399
6	1996.0	at2	1101700	0.596817
7	1997.0	at2	2178329	0.602030
8	1998.0	at2	2130302	0.598983
9	1999.0	at2	2048695	0.613651
10	2000.0	at2	2098806	0.616853
11	2001.0	at2	1823957	0.615854
12	2002.0	at2	449261	0.657764
13	2002.0	ats	1321865	0.586336
14	2003.0	ats	2347610	0.596375
15	2004.0	ats	2416447	0.599768
16	2005.0	ats	2461931	0.591142
17	2006.0	ats	2370869	0.601618
18	2007.0	ats	2430181	0.599744
19	2008.0	ats	2509595	0.614411
20	2009.0	ats	2488354	0.604354
21	2010.0	ats	2438859	0.596634
22	2011.0	ats	2474633	0.599150
23	2012.0	ats	750483	0.625619

	lats_arcmin	lons_arcmin	year	sensor	lons	frp	lats	sample_counts_flare	cloud_cover	sample_counts_all	cloud_adjusted_sample_counts	cloud_adjusted_mean_frp	countries	yearly_fre
0	-5353	-6738	2018	sls	-67.633333	1.498850	-53.883333	1	0.722643	111	30.786575	0.048685	Argentina	1.535530
1	-5340	-6758	2018	sls	-67.966667	3.115118	-53.666667	3	0.653948	114	39.449903	0.078964	Argentina	2.490522
2	-5322	-6809	2018	sls	-68.150000	1.427702	-53.366667	2	0.694180	119	36.392556	0.039231	Argentina	1.237333
3	-5321	-6809	2018	sls	-68.150000	0.835341	-53.350000	1	0.694180	119	36.392556	0.022954	Argentina	0.723957
4	-5319	-6811	2018	sls	-68.183333	2.788271	-53.316667	1	0.701499	119	35.521633	0.078495	Argentina	2.475733

	year	countries	sensor	cloud_adjusted_mean_frp
0	1991	Algeria	at1	775.507689
1	1991	Argentina	at1	25.079550
2	1991	Aruba	at1	0.157931
3	1991	Australia	at1	6.247497
4	1991	Azerbaijan	at1	2.098608

	year	month	day	hhmm	frp	lats	lons
1447133	2011	10	10	1240	1483.605364	3.283333	113.083336
80256	2011	10	10	1240	797.402541	6.783334	116.233330
2467972	2011	10	10	1240	680.190823	3.550000	112.816681
79538	2011	10	10	1240	673.329154	6.616667	115.783348
573243	2011	10	10	1240	414.098024	6.600000	115.783348
2514564	2011	10	10	1240	413.334043	6.600000	115.766670
1446571	2011	10	10	1240	339.196626	3.283333	113.066673
1911699	2011	10	10	1240	319.460852	3.266667	113.066673
2816021	2011	10	10	1240	281.860080	5.616667	114.883331
1911467	2011	10	10	1240	274.944711	3.266667	113.050011
1911825	2011	10	10	1240	274.708376	3.283333	113.050011
2181728	2011	10	10	1240	253.225450	3.266667	113.083336
1568158	2011	10	10	1240	235.660683	4.708333	113.950005
2514353	2011	10	10	1240	191.369266	3.566667	112.816681
97262	2011	10	10	1240	182.137934	6.783334	116.216667
2514534	2011	10	10	1240	181.112946	3.566667	112.833344
2630090	2011	10	10	1240	179.783352	3.550000	112.800003
1447448	2011	10	10	1240	178.396091	3.300000	113.083336
1467965	2011	7	8	1325	152.447936	5.650000	104.983345
1467964	2011	7	8	1506	152.258321	5.650000	104.983345

	year	month	day	hhmm	frp	lats	lons
1565386	2011	10	10	1240	878.288983	-0.466667	117.583328
95560	2011	10	10	1240	808.398317	-0.850000	117.266670
77377	2011	10	10	1240	696.517994	0.100000	117.466675
73962	2011	10	10	1240	657.812886	-1.250000	116.816673
1830000	2011	10	10	1240	592.872657	-0.600000	117.383339
1566146	2011	10	10	1240	521.844566	0.077778	117.483337
2638233	2011	10	10	1240	505.159576	-0.316667	117.299995
2627487	2011	10	10	1240	492.576294	-0.316667	117.283333
76124	2011	10	10	1240	406.262933	0.075000	117.466675
1564607	2011	10	10	1240	338.424843	-0.583333	117.383339
1563880	2011	10	10	1240	329.260367	-0.583333	117.366676
1936718	2011	10	10	1240	309.870437	-0.950000	117.150009
2201744	2011	10	10	1240	261.974802	-0.466667	117.600006
2187879	2011	10	10	1240	249.681183	-0.583333	117.400009
2383975	2011	10	10	1240	248.542847	-0.566667	117.383339
1013304	2011	6	29	1215	188.012175	-2.583333	121.383331
1013305	2011	6	29	1355	188.012175	-2.583333	121.383331
76394	2011	10	10	1240	177.285421	0.075000	117.916679
1568808	2011	10	10	1240	164.475422	-0.600000	117.366676
74663	2011	10	10	1240	163.157604	-0.350000	117.416679

	year	month	day	hhmm	frp	lats	lons
81842	2018	4	9	421	152.567989	31.733333	-101.800000
77916	2017	7	28	435	135.313939	31.983333	-102.633333
76895	2017	9	1	424	130.612883	31.650000	-101.766667
72244	2017	7	2	406	105.603770	27.800000	-97.600000
64467	2018	3	21	416	99.318947	31.366667	-101.783333
76896	2017	10	6	420	95.955763	31.650000	-101.766667
72253	2017	9	25	402	95.523998	27.800000	-97.600000
46463	2017	9	14	347	92.487338	30.050000	-93.750000
72245	2017	7	6	402	90.767055	27.800000	-97.600000
54991	2017	11	3	351	75.911598	29.716667	-95.133333
12996	2017	7	17	420	69.515430	31.966667	-102.600000
68107	2017	10	29	424	64.186839	32.605556	-103.316667
117447	2018	2	15	355	61.448686	29.950000	-93.883333
75897	2018	1	23	351	60.917666	29.983333	-90.450000
84109	2018	1	4	343	60.779668	30.233333	-91.050000
106598	2017	10	21	431	58.392603	32.700000	-103.283333
72252	2017	9	13	413	57.776209	27.800000	-97.600000
64451	2017	7	21	416	55.239907	31.366667	-101.783333
99828	2017	10	15	343	50.107069	27.200000	-90.033333
62993	2017	6	13	358	49.462739	29.850000	-93.983333

	lats_arcmin	lons_arcmin	cloud_adjusted_mean_frp	lons	lats	frp_pc_total	frp_pc_cumsum
0	3140	604	765.107069	6.066668	31.666670	0.002745	0.002745
1	3148	604	709.833820	6.066668	31.800001	0.002547	0.005292
2	3148	603	590.334607	6.050001	31.800001	0.002118	0.007410
3	2838	947	584.039922	9.789188	28.633335	0.002096	0.009506
4	3140	603	579.786994	6.050001	31.666670	0.002080	0.011586
5	2855	1946	476.794746	19.783335	28.916666	0.001711	0.013297
6	3041	4720	440.002166	47.333340	30.683332	0.001579	0.014876
7	3017	4722	435.778274	47.366669	30.283335	0.001564	0.016439
8	3847	6437	425.034570	64.627984	38.783337	0.001525	0.017964
9	2226	5358	393.469800	53.966671	22.433334	0.001412	0.019376
10	3013	4723	393.225861	47.383335	30.216671	0.001411	0.020787
11	3549	-14	393.016787	-0.247187	35.816660	0.001410	0.022197
12	1926	-9205	381.682616	-92.083329	19.433334	0.001370	0.023567
13	1921	-9200	380.047227	-92.000008	19.366667	0.001364	0.024931
14	2554	5133	378.106833	51.549996	25.900002	0.001357	0.026287
15	3045	4740	374.732062	47.666668	30.750000	0.001345	0.027632
16	1918	-9210	367.990784	-92.166672	19.300001	0.001320	0.028952
17	3108	4919	359.919905	49.316666	31.133335	0.001291	0.030244
18	3221	4742	349.407108	47.699997	32.350002	0.001254	0.031497
19	3025	633	348.191729	6.550000	30.416666	0.001249	0.032747